Skip to content

Commit 28c0443

Browse files
authored
TempLocationManager Fixes and Improvements (#8191)
* Remove CompletableFuture from TempLocationManager * Do not run the cleanup if there is nothing to clean up * Exclude JFR repository from temp files self-cleanup
1 parent 82e3386 commit 28c0443

File tree

2 files changed

+121
-39
lines changed

2 files changed

+121
-39
lines changed

dd-java-agent/agent-profiling/profiling-controller/src/main/java/com/datadog/profiling/controller/TempLocationManager.java

Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import datadog.trace.api.config.ProfilingConfig;
66
import datadog.trace.bootstrap.config.provider.ConfigProvider;
7+
import datadog.trace.util.AgentTaskScheduler;
78
import datadog.trace.util.PidHelper;
89
import java.io.IOException;
910
import java.nio.file.FileVisitResult;
@@ -17,10 +18,10 @@
1718
import java.time.Instant;
1819
import java.time.temporal.ChronoUnit;
1920
import java.util.Set;
20-
import java.util.concurrent.CompletableFuture;
21-
import java.util.concurrent.ExecutionException;
21+
import java.util.concurrent.CountDownLatch;
2222
import java.util.concurrent.TimeUnit;
23-
import java.util.concurrent.TimeoutException;
23+
import java.util.regex.Pattern;
24+
import java.util.stream.Stream;
2425
import org.slf4j.Logger;
2526
import org.slf4j.LoggerFactory;
2627

@@ -32,6 +33,9 @@
3233
*/
3334
public final class TempLocationManager {
3435
private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class);
36+
private static final Pattern JFR_DIR_PATTERN =
37+
Pattern.compile("\\d{4}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{6}");
38+
private static final String TEMPDIR_PREFIX = "pid_";
3539

3640
private static final class SingletonHolder {
3741
private static final TempLocationManager INSTANCE = new TempLocationManager();
@@ -64,7 +68,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE
6468
default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {}
6569
}
6670

67-
private class CleanupVisitor implements FileVisitor<Path> {
71+
private final class CleanupVisitor implements FileVisitor<Path> {
6872
private boolean shouldClean;
6973

7074
private final Set<String> pidSet = PidHelper.getJavaPids();
@@ -100,14 +104,19 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
100104
terminated = true;
101105
return FileVisitResult.TERMINATE;
102106
}
107+
if (cleanSelf && JFR_DIR_PATTERN.matcher(dir.getFileName().toString()).matches()) {
108+
// do not delete JFR repository on 'self-cleanup' - it conflicts with the JFR's own cleanup
109+
return FileVisitResult.SKIP_SUBTREE;
110+
}
111+
103112
cleanupTestHook.preVisitDirectory(dir, attrs);
104113

105114
if (dir.equals(baseTempDir)) {
106115
return FileVisitResult.CONTINUE;
107116
}
108117
String fileName = dir.getFileName().toString();
109118
// the JFR repository directories are under <basedir>/pid_<pid>
110-
String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null;
119+
String pid = fileName.startsWith(TEMPDIR_PREFIX) ? fileName.substring(4) : null;
111120
boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid());
112121
shouldClean |= cleanSelf ? isSelfPid : !isSelfPid && !pidSet.contains(pid);
113122
if (shouldClean) {
@@ -169,18 +178,43 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
169178
}
170179
String fileName = dir.getFileName().toString();
171180
// reset the flag only if we are done cleaning the top-level directory
172-
shouldClean = !fileName.startsWith("pid_");
181+
shouldClean = !fileName.startsWith(TEMPDIR_PREFIX);
173182
}
174183
return FileVisitResult.CONTINUE;
175184
}
176185
}
177186

187+
private final class CleanupTask implements Runnable {
188+
private final CountDownLatch latch = new CountDownLatch(1);
189+
private volatile Throwable throwable = null;
190+
191+
@Override
192+
public void run() {
193+
try {
194+
cleanup(false);
195+
} catch (OutOfMemoryError oom) {
196+
throw oom;
197+
} catch (Throwable t) {
198+
throwable = t;
199+
} finally {
200+
latch.countDown();
201+
}
202+
}
203+
204+
boolean await(long timeout, TimeUnit unit) throws Throwable {
205+
boolean ret = latch.await(timeout, unit);
206+
if (throwable != null) {
207+
throw throwable;
208+
}
209+
return ret;
210+
}
211+
}
212+
178213
private final Path baseTempDir;
179214
private final Path tempDir;
180215
private final long cutoffSeconds;
181216

182-
private final CompletableFuture<Void> cleanupTask;
183-
217+
private final CleanupTask cleanupTask = new CleanupTask();
184218
private final CleanupHook cleanupTestHook;
185219

186220
/**
@@ -202,11 +236,7 @@ public static TempLocationManager getInstance() {
202236
static TempLocationManager getInstance(boolean waitForCleanup) {
203237
TempLocationManager instance = SingletonHolder.INSTANCE;
204238
if (waitForCleanup) {
205-
try {
206-
instance.waitForCleanup(5, TimeUnit.SECONDS);
207-
} catch (TimeoutException ignored) {
208-
209-
}
239+
instance.waitForCleanup(5, TimeUnit.SECONDS);
210240
}
211241
return instance;
212242
}
@@ -216,10 +246,11 @@ private TempLocationManager() {
216246
}
217247

218248
TempLocationManager(ConfigProvider configProvider) {
219-
this(configProvider, CleanupHook.EMPTY);
249+
this(configProvider, true, CleanupHook.EMPTY);
220250
}
221251

222-
TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) {
252+
TempLocationManager(
253+
ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) {
223254
cleanupTestHook = testHook;
224255

225256
// In order to avoid racy attempts to clean up files which are currently being processed in a
@@ -258,21 +289,21 @@ private TempLocationManager() {
258289
baseTempDir = configuredTempDir.resolve("ddprof");
259290
baseTempDir.toFile().deleteOnExit();
260291

261-
tempDir = baseTempDir.resolve("pid_" + pid);
262-
cleanupTask = CompletableFuture.runAsync(() -> cleanup(false));
292+
tempDir = baseTempDir.resolve(TEMPDIR_PREFIX + pid);
293+
if (runStartupCleanup) {
294+
// do not execute the background cleanup task when running in tests
295+
AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false));
296+
}
263297

264298
Thread selfCleanup =
265299
new Thread(
266300
() -> {
267-
try {
268-
waitForCleanup(1, TimeUnit.SECONDS);
269-
} catch (TimeoutException e) {
301+
if (!waitForCleanup(1, TimeUnit.SECONDS)) {
270302
log.info(
271303
"Cleanup task timed out. {} temp directory might not have been cleaned up properly",
272304
tempDir);
273-
} finally {
274-
cleanup(true);
275305
}
306+
cleanup(true);
276307
},
277308
"Temp Location Manager Cleanup");
278309
Runtime.getRuntime().addShutdownHook(selfCleanup);
@@ -347,6 +378,19 @@ boolean cleanup(boolean cleanSelf) {
347378
*/
348379
boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
349380
try {
381+
if (!Files.exists(baseTempDir)) {
382+
// not event the main temp location exists; nothing to clean up
383+
return true;
384+
}
385+
try (Stream<Path> paths = Files.walk(baseTempDir)) {
386+
if (paths.noneMatch(
387+
path ->
388+
Files.isDirectory(path)
389+
&& path.getFileName().toString().startsWith(TEMPDIR_PREFIX))) {
390+
// nothing to clean up; bail out early
391+
return true;
392+
}
393+
}
350394
cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit);
351395
CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit);
352396
Files.walkFileTree(baseTempDir, visitor);
@@ -362,21 +406,24 @@ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
362406
}
363407

364408
// accessible for tests
365-
void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException {
409+
boolean waitForCleanup(long timeout, TimeUnit unit) {
366410
try {
367-
cleanupTask.get(timeout, unit);
411+
return cleanupTask.await(timeout, unit);
368412
} catch (InterruptedException e) {
369-
cleanupTask.cancel(true);
413+
log.debug("Temp directory cleanup was interrupted");
370414
Thread.currentThread().interrupt();
371-
} catch (TimeoutException e) {
372-
cleanupTask.cancel(true);
373-
throw e;
374-
} catch (ExecutionException e) {
415+
} catch (Throwable t) {
375416
if (log.isDebugEnabled()) {
376-
log.debug("Failed to cleanup temp directory: {}", tempDir, e);
417+
log.debug("Failed to cleanup temp directory: {}", tempDir, t);
377418
} else {
378419
log.debug("Failed to cleanup temp directory: {}", tempDir);
379420
}
380421
}
422+
return false;
423+
}
424+
425+
// accessible for tests
426+
void createDirStructure() throws IOException {
427+
Files.createDirectories(baseTempDir);
381428
}
382429
}

dd-java-agent/agent-profiling/profiling-controller/src/test/java/com/datadog/profiling/controller/TempLocationManagerTest.java

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.UUID;
1919
import java.util.concurrent.Phaser;
2020
import java.util.concurrent.TimeUnit;
21+
import java.util.concurrent.atomic.AtomicBoolean;
2122
import java.util.concurrent.locks.LockSupport;
2223
import java.util.stream.Stream;
2324
import org.junit.jupiter.api.Test;
@@ -173,7 +174,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
173174
}
174175
};
175176

176-
TempLocationManager mgr = instance(baseDir, blocker);
177+
TempLocationManager mgr = instance(baseDir, true, blocker);
177178

178179
// wait for the cleanup start
179180
phaser.arriveAndAwaitAdvance();
@@ -187,8 +188,9 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
187188

188189
@ParameterizedTest
189190
@MethodSource("timeoutTestArguments")
190-
void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception {
191-
long timeoutMs = 500;
191+
void testCleanupWithTimeout(boolean selfCleanup, boolean shouldSucceed, String section)
192+
throws Exception {
193+
long timeoutMs = 10;
192194
TempLocationManager.CleanupHook delayer =
193195
new TempLocationManager.CleanupHook() {
194196
@Override
@@ -229,30 +231,63 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
229231
Files.createTempDirectory(
230232
"ddprof-test-",
231233
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
232-
TempLocationManager instance = instance(baseDir, delayer);
233-
boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS);
234-
assertTrue(rslt);
234+
TempLocationManager instance = instance(baseDir, false, delayer);
235+
Path mytempdir = instance.getTempDir();
236+
Path otherTempdir = mytempdir.getParent().resolve("pid_0000");
237+
Files.createDirectories(otherTempdir);
238+
Files.createFile(mytempdir.resolve("dummy"));
239+
Files.createFile(otherTempdir.resolve("dummy"));
240+
boolean rslt =
241+
instance.cleanup(
242+
selfCleanup, (long) (timeoutMs * (shouldSucceed ? 10 : 0.5d)), TimeUnit.MILLISECONDS);
243+
assertEquals(shouldSucceed, rslt);
244+
}
245+
246+
@Test
247+
void testShortCircuit() throws Exception {
248+
Path baseDir =
249+
Files.createTempDirectory(
250+
"ddprof-test-",
251+
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
252+
AtomicBoolean executed = new AtomicBoolean();
253+
TempLocationManager.CleanupHook hook =
254+
new TempLocationManager.CleanupHook() {
255+
@Override
256+
public void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {
257+
executed.set(true);
258+
}
259+
};
260+
TempLocationManager instance = instance(baseDir, false, hook);
261+
262+
instance.createDirStructure();
263+
264+
boolean ret = instance.cleanup(false);
265+
assertTrue(ret);
266+
assertFalse(executed.get());
235267
}
236268

237269
private static Stream<Arguments> timeoutTestArguments() {
238270
List<Arguments> argumentsList = new ArrayList<>();
239271
for (boolean selfCleanup : new boolean[] {true, false}) {
240272
for (String intercepted :
241273
new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) {
242-
argumentsList.add(Arguments.of(selfCleanup, intercepted));
274+
argumentsList.add(Arguments.of(selfCleanup, true, intercepted));
275+
argumentsList.add(Arguments.of(selfCleanup, false, intercepted));
243276
}
244277
}
245278
return argumentsList.stream();
246279
}
247280

248-
private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook)
281+
private TempLocationManager instance(
282+
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook)
249283
throws IOException {
250284
Properties props = new Properties();
251285
props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString());
252286
props.put(
253287
ProfilingConfig.PROFILING_UPLOAD_PERIOD,
254288
"0"); // to force immediate cleanup; must be a string value!
255289

256-
return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook);
290+
return new TempLocationManager(
291+
ConfigProvider.withPropertiesOverride(props), withStartupCleanup, cleanupHook);
257292
}
258293
}

0 commit comments

Comments
 (0)