diff --git a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java index dbe3c15bb84..c1ec6e12844 100644 --- a/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java +++ b/dd-java-agent/agent-profiling/profiling-controller-openjdk/src/main/java/com/datadog/profiling/controller/openjdk/OpenJdkController.java @@ -18,8 +18,6 @@ import static com.datadog.profiling.controller.ProfilingSupport.*; import static com.datadog.profiling.controller.ProfilingSupport.isObjectCountParallelized; import static datadog.trace.api.Platform.isJavaVersionAtLeast; -import static datadog.trace.api.config.ProfilingConfig.PROFILING_DEBUG_CLEANUP_REPO; -import static datadog.trace.api.config.ProfilingConfig.PROFILING_DEBUG_CLEANUP_REPO_DEFAULT; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_ENABLED_DEFAULT; import static datadog.trace.api.config.ProfilingConfig.PROFILING_HEAP_HISTOGRAM_MODE; @@ -33,6 +31,7 @@ import com.datadog.profiling.controller.ConfigurationException; import com.datadog.profiling.controller.Controller; import com.datadog.profiling.controller.ControllerContext; +import com.datadog.profiling.controller.TempLocationManager; import com.datadog.profiling.controller.jfr.JFRAccess; import com.datadog.profiling.controller.jfr.JfpUtils; import com.datadog.profiling.controller.openjdk.events.AvailableProcessorCoresEvent; @@ -42,19 +41,13 @@ import datadog.trace.bootstrap.config.provider.ConfigProvider; import datadog.trace.bootstrap.instrumentation.jfr.backpressure.BackpressureProfiling; import datadog.trace.bootstrap.instrumentation.jfr.exceptions.ExceptionProfiling; -import datadog.trace.util.PidHelper; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.io.IOException; -import java.nio.file.FileVisitResult; -import java.nio.file.FileVisitor; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; import java.time.Duration; import java.util.Collections; import java.util.Map; -import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,14 +85,8 @@ public OpenJdkController(final ConfigProvider configProvider) // configure the JFR stackdepth before we try to load any JFR classes int requestedStackDepth = getConfiguredStackDepth(configProvider); this.jfrStackDepthApplied = JFRAccess.instance().setStackDepth(requestedStackDepth); - boolean shouldCleanupJfrRepository = - configProvider.getBoolean( - PROFILING_DEBUG_CLEANUP_REPO, PROFILING_DEBUG_CLEANUP_REPO_DEFAULT); - String jfrRepositoryBase = null; - if (shouldCleanupJfrRepository) { - jfrRepositoryBase = getJfrRepositoryBase(configProvider); - JFRAccess.instance().setBaseLocation(jfrRepositoryBase + "/pid_" + PidHelper.getPid()); - } + String jfrRepositoryBase = getJfrRepositoryBase(configProvider); + JFRAccess.instance().setBaseLocation(jfrRepositoryBase); // Make sure we can load JFR classes before declaring that we have successfully created // factory and can use it. Class.forName("jdk.jfr.Recording"); @@ -112,10 +99,6 @@ public OpenJdkController(final ConfigProvider configProvider) Map recordingSettings; try { - if (shouldCleanupJfrRepository) { - cleanupJfrRepositories(Paths.get(jfrRepositoryBase)); - } - recordingSettings = JfpUtils.readNamedJfpResource( ultraMinimal ? JfpUtils.SAFEPOINTS_JFP : JfpUtils.DEFAULT_JFP); @@ -270,21 +253,27 @@ && isEventEnabled(recordingSettings, "jdk.NativeMethodSample")) { } private static String getJfrRepositoryBase(ConfigProvider configProvider) { - return configProvider.getString( - ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, - ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT); - } - - private static void cleanupJfrRepositories(Path repositoryBase) { - try { - Files.walkFileTree(repositoryBase, new JfrCleanupVisitor(repositoryBase)); - } catch (IOException e) { - if (log.isDebugEnabled()) { - log.warn("Unable to cleanup old JFR repositories", e); - } else { - log.warn("Unable to cleanup old JFR repositories"); + String legacy = + configProvider.getString( + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT); + if (!legacy.equals(ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE_DEFAULT)) { + log.warn( + "The configuration key {} is deprecated. Please use {} instead.", + ProfilingConfig.PROFILING_JFR_REPOSITORY_BASE, + ProfilingConfig.PROFILING_TEMP_DIR); + } + Path repositoryPath = TempLocationManager.getInstance().getTempDir().resolve("jfr"); + if (!Files.exists(repositoryPath)) { + try { + Files.createDirectories(repositoryPath); + } catch (IOException e) { + log.error("Failed to create JFR repository directory: {}", repositoryPath, e); + throw new IllegalStateException( + "Failed to create JFR repository directory: " + repositoryPath, e); } } + return repositoryPath.toString(); } int getMaxSize() { @@ -331,58 +320,4 @@ private int getConfiguredStackDepth(ConfigProvider configProvider) { return configProvider.getInteger( ProfilingConfig.PROFILING_STACKDEPTH, ProfilingConfig.PROFILING_STACKDEPTH_DEFAULT); } - - private static class JfrCleanupVisitor implements FileVisitor { - private boolean shouldClean = false; - - private final Path root; - private final Set pidSet = PidHelper.getJavaPids(); - - JfrCleanupVisitor(Path root) { - this.root = root; - } - - @Override - public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) - throws IOException { - if (dir.equals(root)) { - return FileVisitResult.CONTINUE; - } - String fileName = dir.getFileName().toString(); - // the JFR repository directories are under /pid_ - String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null; - shouldClean |= pid != null && !pidSet.contains(pid); - if (shouldClean) { - log.debug("Cleaning JFR repository under {}", dir); - } - return shouldClean ? FileVisitResult.CONTINUE : FileVisitResult.SKIP_SUBTREE; - } - - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - if (file.toString().toLowerCase().endsWith(".jfr")) { - Files.delete(file); - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { - if (log.isDebugEnabled() && file.toString().toLowerCase().endsWith(".jfr")) { - log.debug("Failed to delete file {}", file, exc); - } - return FileVisitResult.CONTINUE; - } - - @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { - if (shouldClean) { - Files.delete(dir); - String fileName = dir.getFileName().toString(); - // reset the flag only if we are done cleaning the top-level directory - shouldClean = !fileName.startsWith("pid_"); - } - return FileVisitResult.CONTINUE; - } - } } diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java new file mode 100644 index 00000000000..f82806f4ef0 --- /dev/null +++ b/dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java @@ -0,0 +1,379 @@ +package com.datadog.profiling.controller; + +import datadog.trace.api.config.ProfilingConfig; +import datadog.trace.bootstrap.config.provider.ConfigProvider; +import datadog.trace.util.PidHelper; +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.FileVisitor; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.PosixFilePermissions; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A manager class for temporary locations used by the profiling product. The temporary location is + * keyed by the process ID and allows for cleanup of orphaned temporary files on startup by querying + * the list of running Java processes and cleaning up any temporary locations that do not correspond + * to a running Java process. Also, the temporary location is cleaned up on shutdown. + */ +public final class TempLocationManager { + private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class); + + private static final class SingletonHolder { + private static final TempLocationManager INSTANCE = new TempLocationManager(); + } + + interface CleanupHook extends FileVisitor { + CleanupHook EMPTY = new CleanupHook() {}; + + @Override + default FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + return null; + } + + @Override + default FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + return null; + } + + @Override + default FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + return null; + } + + @Override + default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + return null; + } + + default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {} + } + + private class CleanupVisitor implements FileVisitor { + private boolean shouldClean; + + private final Set pidSet = PidHelper.getJavaPids(); + + private final boolean cleanSelf; + private final Instant cutoff; + private final Instant timeoutTarget; + + private boolean terminated; + + CleanupVisitor(boolean cleanSelf, long timeout, TimeUnit unit) { + this.cleanSelf = cleanSelf; + this.cutoff = Instant.now().minus(cutoffSeconds, ChronoUnit.SECONDS); + this.timeoutTarget = + timeout > -1 + ? Instant.now().plus(TimeUnit.MILLISECONDS.convert(timeout, unit), ChronoUnit.MILLIS) + : null; + } + + boolean isTerminated() { + return terminated; + } + + private boolean isTimedOut() { + return timeoutTarget != null && Instant.now().isAfter(timeoutTarget); + } + + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } + cleanupTestHook.preVisitDirectory(dir, attrs); + + if (dir.equals(baseTempDir)) { + return FileVisitResult.CONTINUE; + } + String fileName = dir.getFileName().toString(); + // the JFR repository directories are under /pid_ + String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null; + boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid()); + shouldClean |= (cleanSelf && isSelfPid) || (!cleanSelf && !pidSet.contains(pid)); + if (shouldClean) { + log.debug("Cleaning temporary location {}", dir); + } + return shouldClean ? FileVisitResult.CONTINUE : FileVisitResult.SKIP_SUBTREE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } + cleanupTestHook.visitFile(file, attrs); + try { + if (Files.getLastModifiedTime(file).toInstant().isAfter(cutoff)) { + return FileVisitResult.SKIP_SUBTREE; + } + Files.delete(file); + } catch (NoSuchFileException ignored) { + // another process has already cleaned it; ignore + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } + cleanupTestHook.visitFileFailed(file, exc); + // do not log files/directories removed by another process running concurrently + if (!(exc instanceof NoSuchFileException) && log.isDebugEnabled()) { + log.debug("Failed to delete file {}", file, exc); + } + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (isTimedOut()) { + log.debug("Cleaning task timed out"); + terminated = true; + return FileVisitResult.TERMINATE; + } + cleanupTestHook.postVisitDirectory(dir, exc); + if (exc instanceof NoSuchFileException) { + return FileVisitResult.CONTINUE; + } + if (shouldClean) { + try { + Files.delete(dir); + } catch (NoSuchFileException ignored) { + // another process has already cleaned it; ignore + } + String fileName = dir.getFileName().toString(); + // reset the flag only if we are done cleaning the top-level directory + shouldClean = !fileName.startsWith("pid_"); + } + return FileVisitResult.CONTINUE; + } + } + + private final Path baseTempDir; + private final Path tempDir; + private final long cutoffSeconds; + + private final CompletableFuture cleanupTask; + + private final CleanupHook cleanupTestHook; + + /** + * Get the singleton instance of the TempLocationManager. It will run the cleanup task in the + * background. + * + * @return the singleton instance of the TempLocationManager + */ + public static TempLocationManager getInstance() { + return getInstance(false); + } + + /** + * Get the singleton instance of the TempLocationManager. + * + * @param waitForCleanup if true, wait for the cleanup task to finish before returning + * @return the singleton instance of the TempLocationManager + */ + static TempLocationManager getInstance(boolean waitForCleanup) { + TempLocationManager instance = SingletonHolder.INSTANCE; + if (waitForCleanup) { + try { + instance.waitForCleanup(5, TimeUnit.SECONDS); + } catch (TimeoutException ignored) { + + } + } + return instance; + } + + private TempLocationManager() { + this(ConfigProvider.getInstance()); + } + + TempLocationManager(ConfigProvider configProvider) { + this(configProvider, CleanupHook.EMPTY); + } + + TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) { + cleanupTestHook = testHook; + + // In order to avoid racy attempts to clean up files which are currently being processed in a + // JVM which is being shut down (the JVMs far in the shutdown routine may not be reported by + // 'jps' but still can be eg. processing JFR chunks) we will not clean up any files not older + // than '2 * PROFILING_UPLOAD_PERIOD' seconds. + // The reasoning is that even if the file is created immediately at JVM startup once it is + // 'PROFILING_UPLOAD_PERIOD' seconds old it gets processed to upload the final profile data and + // this processing will not take longer than another `PROFILING_UPLOAD_PERIOD' seconds. + // This is just an assumption but as long as the profiled application is working normally (eg. + // OS is not stalling) this assumption will hold. + cutoffSeconds = + configProvider.getLong( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + ProfilingConfig.PROFILING_UPLOAD_PERIOD_DEFAULT); + Path configuredTempDir = + Paths.get( + configProvider.getString( + ProfilingConfig.PROFILING_TEMP_DIR, ProfilingConfig.PROFILING_TEMP_DIR_DEFAULT)); + if (!Files.exists(configuredTempDir)) { + log.warn( + "Base temp directory, as defined in '" + + ProfilingConfig.PROFILING_TEMP_DIR + + "' does not exist: " + + configuredTempDir); + throw new IllegalStateException( + "Base temp directory, as defined in '" + + ProfilingConfig.PROFILING_TEMP_DIR + + "' does not exist: " + + configuredTempDir); + } + + String pid = PidHelper.getPid(); + + baseTempDir = configuredTempDir.resolve("ddprof"); + baseTempDir.toFile().deleteOnExit(); + + tempDir = baseTempDir.resolve("pid_" + pid); + cleanupTask = CompletableFuture.runAsync(() -> cleanup(false)); + + Thread selfCleanup = + new Thread( + () -> { + try { + waitForCleanup(1, TimeUnit.SECONDS); + } catch (TimeoutException e) { + log.info( + "Cleanup task timed out. {} temp directory might not have been cleaned up properly", + tempDir); + } finally { + cleanup(true); + } + }, + "Temp Location Manager Cleanup"); + Runtime.getRuntime().addShutdownHook(selfCleanup); + } + + /** + * Get the temporary directory for the current process. The directory will be removed at JVM exit. + * + * @return the temporary directory for the current process + */ + public Path getTempDir() { + return getTempDir(null); + } + + /** + * Get the temporary subdirectory for the current process. The directory will be removed at JVM + * exit. + * + * @param subPath the relative subdirectory path, may be {@literal null} + * @return the temporary subdirectory for the current process + */ + public Path getTempDir(Path subPath) { + return getTempDir(subPath, true); + } + + /** + * Get the temporary subdirectory for the current process. + * + * @param subPath the relative subdirectory path, may be {@literal null} + * @param create if true, create the directory if it does not exist + * @return the temporary directory for the current process + * @throws IllegalStateException if the directory could not be created + */ + public Path getTempDir(Path subPath, boolean create) { + Path rslt = + subPath != null && !subPath.toString().isEmpty() ? tempDir.resolve(subPath) : tempDir; + if (create && !Files.exists(rslt)) { + try { + Files.createDirectories( + rslt, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + } catch (Exception e) { + log.warn("Failed to create temp directory: {}", tempDir, e); + throw new IllegalStateException("Failed to create temp directory: " + tempDir, e); + } + } + return rslt; + } + + /** + * Walk the base temp directory recursively and remove all inactive per-process entries. No + * timeout is applied. + * + * @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false} + * only the other processes will be cleaned up + * @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (eg. + * interruption etc.) + */ + boolean cleanup(boolean cleanSelf) { + return cleanup(cleanSelf, -1, TimeUnit.SECONDS); + } + + /** + * Walk the base temp directory recursively and remove all inactive per-process entries + * + * @param cleanSelf {@literal true} will call only this process' temp directory, {@literal false} + * only the other processes will be cleaned up + * @param timeout the task timeout; may be {@literal -1} to signal no timeout + * @param unit the task timeout unit + * @return {@literal true} if cleanup fully succeeded or {@literal false} otherwise (timeout, + * interruption etc.) + */ + boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) { + try { + cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit); + CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit); + Files.walkFileTree(baseTempDir, visitor); + return !visitor.isTerminated(); + } catch (IOException e) { + if (log.isDebugEnabled()) { + log.warn("Unable to cleanup temp location {}", baseTempDir, e); + } else { + log.warn("Unable to cleanup temp location {}", baseTempDir); + } + } + return false; + } + + // accessible for tests + void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException { + try { + cleanupTask.get(timeout, unit); + } catch (InterruptedException e) { + cleanupTask.cancel(true); + Thread.currentThread().interrupt(); + } catch (TimeoutException e) { + cleanupTask.cancel(true); + throw e; + } catch (ExecutionException e) { + if (log.isDebugEnabled()) { + log.debug("Failed to cleanup temp directory: {}", tempDir, e); + } else { + log.debug("Failed to cleanup temp directory: {}", tempDir); + } + } + } +} diff --git a/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java new file mode 100644 index 00000000000..fdc839ffaae --- /dev/null +++ b/dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java @@ -0,0 +1,258 @@ +package com.datadog.profiling.controller; + +import static org.junit.jupiter.api.Assertions.*; + +import datadog.trace.api.config.ProfilingConfig; +import datadog.trace.bootstrap.config.provider.ConfigProvider; +import datadog.trace.util.PidHelper; +import java.io.IOException; +import java.nio.file.FileVisitResult; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.Phaser; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.LockSupport; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; + +public class TempLocationManagerTest { + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testDefault(String subPath) throws Exception { + TempLocationManager tempLocationManager = TempLocationManager.getInstance(true); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); + assertNotNull(tempDir); + assertTrue(Files.exists(tempDir)); + assertTrue(Files.isDirectory(tempDir)); + assertTrue(Files.isWritable(tempDir)); + assertTrue(Files.isReadable(tempDir)); + assertTrue(Files.isExecutable(tempDir)); + assertTrue(tempDir.toString().contains("pid_" + PidHelper.getPid())); + } + + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testFromConfig(String subPath) throws Exception { + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + myDir.toFile().deleteOnExit(); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); + assertNotNull(tempDir); + assertTrue(tempDir.toString().startsWith(myDir.toString())); + } + + @Test + void testFromConfigInvalid() { + Path myDir = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString()); + // do not create the directory - it should trigger an exception + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + assertThrows(IllegalStateException.class, () -> new TempLocationManager(configProvider)); + } + + @Test + void testFromConfigNotWritable() throws Exception { + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("r-x------"))); + myDir.toFile().deleteOnExit(); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + assertThrows(IllegalStateException.class, tempLocationManager::getTempDir); + } + + @ParameterizedTest + @ValueSource(strings = {"", "test1"}) + void testCleanup(String subPath) throws Exception { + Path myDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + myDir.toFile().deleteOnExit(); + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, myDir.toString()); + props.put( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + "0"); // to force immediate cleanup; must be a string value! + ConfigProvider configProvider = ConfigProvider.withPropertiesOverride(props); + TempLocationManager tempLocationManager = new TempLocationManager(configProvider); + Path tempDir = tempLocationManager.getTempDir(Paths.get(subPath)); + assertNotNull(tempDir); + + // fake temp location + Path fakeTempDir = tempDir.getParent(); + while (fakeTempDir != null && !fakeTempDir.endsWith("ddprof")) { + fakeTempDir = fakeTempDir.getParent(); + } + fakeTempDir = fakeTempDir.resolve("pid_0000"); + Files.createDirectories(fakeTempDir); + Path tmpFile = Files.createFile(fakeTempDir.resolve("test.txt")); + tmpFile.toFile().deleteOnExit(); // make sure this is deleted at exit + fakeTempDir.toFile().deleteOnExit(); // also this one + tempLocationManager.cleanup(false); + // fake temp location should be deleted + // real temp location should be kept + assertFalse(Files.exists(fakeTempDir)); + assertTrue(Files.exists(tempDir)); + } + + @ParameterizedTest + @ValueSource(strings = {"preVisitDirectory", "visitFile", "postVisitDirectory"}) + void testConcurrentCleanup(String section) throws Exception { + /* This test simulates concurrent cleanup + It utilizes a special hook to create synchronization points in the filetree walking routine, + allowing to delete the files at various points of execution. + The test makes sure that the cleanup is not interrupted and the file and directory being deleted + stays deleted. + */ + Path baseDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + + Path fakeTempDir = baseDir.resolve("ddprof/pid_1234/scratch"); + Files.createDirectories(fakeTempDir); + Path fakeTempFile = fakeTempDir.resolve("libxxx.so"); + Files.createFile(fakeTempFile); + + fakeTempDir.toFile().deleteOnExit(); + fakeTempFile.toFile().deleteOnExit(); + + Phaser phaser = new Phaser(2); + + TempLocationManager.CleanupHook blocker = + new TempLocationManager.CleanupHook() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (section.equals("preVisitDirectory") && dir.equals(fakeTempDir)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + if (section.equals("visitFile") && file.equals(fakeTempFile)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (section.equals("postVisitDirectory") && dir.equals(fakeTempDir)) { + phaser.arriveAndAwaitAdvance(); + phaser.arriveAndAwaitAdvance(); + } + return null; + } + }; + + TempLocationManager mgr = instance(baseDir, blocker); + + // wait for the cleanup start + phaser.arriveAndAwaitAdvance(); + Files.deleteIfExists(fakeTempFile); + phaser.arriveAndAwaitAdvance(); + mgr.waitForCleanup(30, TimeUnit.SECONDS); + + assertFalse(Files.exists(fakeTempFile)); + assertFalse(Files.exists(fakeTempDir)); + } + + @ParameterizedTest + @MethodSource("timeoutTestArguments") + void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception { + long timeoutMs = 500; + TempLocationManager.CleanupHook delayer = + new TempLocationManager.CleanupHook() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + if (section.equals("preVisitDirectory")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.preVisitDirectory(dir, attrs); + } + + @Override + public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException { + if (section.equals("visitFileFailed")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.visitFileFailed(file, exc); + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + if (section.equals("postVisitDirectory")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.postVisitDirectory(dir, exc); + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) + throws IOException { + if (section.equals("visitFile")) { + LockSupport.parkNanos(timeoutMs * 1_000_000); + } + return TempLocationManager.CleanupHook.super.visitFile(file, attrs); + } + }; + Path baseDir = + Files.createTempDirectory( + "ddprof-test-", + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + TempLocationManager instance = instance(baseDir, delayer); + boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS); + assertTrue(rslt); + } + + private static Stream timeoutTestArguments() { + List argumentsList = new ArrayList<>(); + for (boolean selfCleanup : new boolean[] {true, false}) { + for (String intercepted : + new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) { + argumentsList.add(Arguments.of(selfCleanup, intercepted)); + } + } + return argumentsList.stream(); + } + + private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook) + throws IOException { + Properties props = new Properties(); + props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString()); + props.put( + ProfilingConfig.PROFILING_UPLOAD_PERIOD, + "0"); // to force immediate cleanup; must be a string value! + + return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook); + } +} diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java index c4c49d82731..5c9fbba38b7 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java +++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/DatadogProfiler.java @@ -33,6 +33,7 @@ import static datadog.trace.api.config.ProfilingConfig.PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS_DEFAULT; import com.datadog.profiling.controller.OngoingRecording; +import com.datadog.profiling.controller.TempLocationManager; import com.datadog.profiling.utils.ProfilingMode; import com.datadoghq.profiler.ContextSetter; import com.datadoghq.profiler.JavaProfiler; @@ -43,6 +44,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -106,6 +108,8 @@ public static DatadogProfiler newInstance(ConfigProvider configProvider) { private final long queueTimeThresholdMillis; + private final Path recordingsPath; + private DatadogProfiler(ConfigProvider configProvider) { this(configProvider, getContextAttributes(configProvider)); } @@ -148,6 +152,19 @@ private DatadogProfiler(ConfigProvider configProvider) { configProvider.getLong( PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS, PROFILING_QUEUEING_TIME_THRESHOLD_MILLIS_DEFAULT); + + this.recordingsPath = TempLocationManager.getInstance().getTempDir().resolve("recordings"); + if (!Files.exists(recordingsPath)) { + try { + Files.createDirectories( + recordingsPath, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------"))); + } catch (IOException e) { + log.warn("Failed to create recordings directory: {}", recordingsPath, e); + throw new IllegalStateException( + "Failed to create recordings directory: " + recordingsPath, e); + } + } } void addThread() { @@ -212,7 +229,7 @@ String executeProfilerCmd(String cmd) throws IOException { Path newRecording() throws IOException, IllegalStateException { if (recordingFlag.compareAndSet(false, true)) { - Path recFile = Files.createTempFile("dd-profiler-", ".jfr"); + Path recFile = Files.createTempFile(recordingsPath, "dd-profiler-", ".jfr"); String cmd = cmdStartProfiling(recFile); try { String rslt = executeProfilerCmd(cmd); diff --git a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java index a2b24925b54..654bd63f904 100644 --- a/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java +++ b/dd-java-agent/agent-profiling/profiling-ddprof/src/main/java/com/datadog/profiling/ddprof/JavaProfilerLoader.java @@ -1,8 +1,12 @@ package com.datadog.profiling.ddprof; +import com.datadog.profiling.controller.TempLocationManager; import com.datadoghq.profiler.JavaProfiler; import datadog.trace.api.config.ProfilingConfig; import datadog.trace.bootstrap.config.provider.ConfigProvider; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; /** * Only loading the profiler itself needs to be protected as a singleton. Separating the loading @@ -17,12 +21,20 @@ public class JavaProfilerLoader { Throwable reasonNotLoaded = null; try { ConfigProvider configProvider = ConfigProvider.getInstance(); + String scratch = configProvider.getString(ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH); + if (scratch == null) { + Path scratchPath = TempLocationManager.getInstance().getTempDir().resolve("scratch"); + if (!Files.exists(scratchPath)) { + Files.createDirectories( + scratchPath, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); + } + scratch = scratchPath.toString(); + } profiler = JavaProfiler.getInstance( configProvider.getString(ProfilingConfig.PROFILING_DATADOG_PROFILER_LIBPATH), - configProvider.getString( - ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH, - ProfilingConfig.PROFILING_DATADOG_PROFILER_SCRATCH_DEFAULT)); + scratch); // sanity test - force load Datadog profiler to catch it not being available early profiler.execute("status"); } catch (Throwable t) { diff --git a/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java b/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java index 4d5c10f7fb1..28f7012edd3 100644 --- a/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java +++ b/dd-trace-api/src/main/java/datadog/trace/api/config/ProfilingConfig.java @@ -72,9 +72,6 @@ public final class ProfilingConfig { public static final String PROFILING_JFR_REPOSITORY_BASE_DEFAULT = System.getProperty("java.io.tmpdir") + "/dd/jfr"; - public static final String PROFILING_JFR_REPOSITORY_CLEANUP = "profiling.jfr.repository.cleanup"; - public static final boolean PROFILING_JFR_REPOSITORY_CLEANUP_DEFAULT = true; - public static final String PROFILING_DATADOG_PROFILER_ENABLED = "profiling.ddprof.enabled"; public static final String PROFILING_DIRECT_ALLOCATION_ENABLED = @@ -87,8 +84,7 @@ public final class ProfilingConfig { // Java profiler lib needs to be extracted from JAR and placed into the scratch location // By default the scratch is the os temp directory but can be overridden by user public static final String PROFILING_DATADOG_PROFILER_SCRATCH = "profiling.ddprof.scratch"; - public static final String PROFILING_DATADOG_PROFILER_SCRATCH_DEFAULT = - System.getProperty("java.io.tmpdir"); + public static final String PROFILING_DATADOG_PROFILER_LIBPATH = "profiling.ddprof.debug.lib"; public static final String PROFILING_DATADOG_PROFILER_ALLOC_ENABLED = "profiling.ddprof.alloc.enabled"; @@ -182,6 +178,9 @@ public final class ProfilingConfig { public static final String PROFILING_UPLOAD_SUMMARY_ON_413 = "profiling.upload.summary-on-413"; public static final boolean PROFILING_UPLOAD_SUMMARY_ON_413_DEFAULT = false; + public static final String PROFILING_TEMP_DIR = "profiling.tempdir"; + public static final String PROFILING_TEMP_DIR_DEFAULT = System.getProperty("java.io.tmpdir"); + // Not intended for production use public static final String PROFILING_AGENTLESS = "profiling.agentless"; public static final boolean PROFILING_AGENTLESS_DEFAULT = false; @@ -192,9 +191,6 @@ public final class ProfilingConfig { public static final String PROFILING_DEBUG_DUMP_PATH = "profiling.debug.dump_path"; public static final String PROFILING_DEBUG_JFR_DISABLED = "profiling.debug.jfr.disabled"; - public static final String PROFILING_DEBUG_CLEANUP_REPO = "profiling.debug.cleanup.jfr.repo"; - public static final boolean PROFILING_DEBUG_CLEANUP_REPO_DEFAULT = false; - public static final String PROFILING_CONTEXT_ATTRIBUTES = "profiling.context.attributes"; public static final String PROFILING_CONTEXT_ATTRIBUTES_SPAN_NAME_ENABLED = diff --git a/internal-api/src/main/java/datadog/trace/util/PidHelper.java b/internal-api/src/main/java/datadog/trace/util/PidHelper.java index eacce63374b..4121d9da68d 100644 --- a/internal-api/src/main/java/datadog/trace/util/PidHelper.java +++ b/internal-api/src/main/java/datadog/trace/util/PidHelper.java @@ -5,10 +5,12 @@ import datadog.trace.context.TraceScope; import de.thetaphi.forbiddenapis.SuppressForbidden; import java.io.BufferedReader; +import java.io.IOException; import java.io.InputStreamReader; import java.lang.management.ManagementFactory; import java.util.Collections; import java.util.Set; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import org.slf4j.Logger; @@ -70,22 +72,33 @@ public static Set getJavaPids() { ProcessBuilder pb = new ProcessBuilder("jps"); try (TraceScope ignored = AgentTracer.get().muteTracing()) { Process p = pb.start(); - if (p.waitFor(500, TimeUnit.MILLISECONDS)) { + // start draining the subcommand's pipes asynchronously to avoid flooding them + CompletableFuture> collecting = + CompletableFuture.supplyAsync( + () -> { + try (BufferedReader br = + new BufferedReader(new InputStreamReader(p.getInputStream()))) { + return br.lines() + .filter(l -> !l.contains("jps")) + .map( + l -> { + int idx = l.indexOf(' '); + return l.substring(0, idx); + }) + .collect(java.util.stream.Collectors.toSet()); + } catch (IOException e) { + log.debug("Unable to list java processes via 'jps'", e); + return Collections.emptySet(); + } + }); + if (p.waitFor(1200, TimeUnit.MILLISECONDS)) { if (p.exitValue() == 0) { - try (BufferedReader br = new BufferedReader(new InputStreamReader(p.getInputStream()))) { - return br.lines() - .filter(l -> !l.contains("jps")) - .map( - l -> { - int idx = l.indexOf(' '); - return l.substring(0, idx); - }) - .collect(java.util.stream.Collectors.toSet()); - } + return collecting.get(); } else { log.debug("Execution of 'jps' failed with exit code {}", p.exitValue()); } } else { + p.destroyForcibly(); log.debug("Execution of 'jps' timed out"); } } catch (Exception e) {