From 75156f627459e013f6d776037e7ce7e51864bcb4 Mon Sep 17 00:00:00 2001 From: jean-philippe bempel Date: Mon, 6 Jan 2025 11:15:57 +0100 Subject: [PATCH] Add mechanism to remove exception probes Add circuit breaker when evaluating exception probe to detect when an exception probe is executed in a hot path only waiting for exception to happen. when circuit breaker is open after 1000 calls in 10 seconds window, we check if there is no exception captured during this period too and remove the probe from the configuration and un-instrument it. This will prevent too much overhead for hot path --- .../exception/DefaultExceptionDebugger.java | 10 ++- .../exception/ExceptionProbeManager.java | 31 ++++++- .../debugger/probe/ExceptionProbe.java | 19 ++++- .../exception/ExceptionProbeManagerTest.java | 81 ++++++++++++------- 4 files changed, 106 insertions(+), 35 deletions(-) diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java index 3c3bb225ca2..a5d8b21b33a 100644 --- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/DefaultExceptionDebugger.java @@ -57,6 +57,7 @@ public DefaultExceptionDebugger( ClassNameFilter classNameFiltering, int maxExceptionPerSecond) { this.exceptionProbeManager = exceptionProbeManager; + this.exceptionProbeManager.setDefaultExceptionDebugger(this); this.configurationUpdater = configurationUpdater; this.classNameFiltering = classNameFiltering; this.circuitBreaker = new CircuitBreaker(maxExceptionPerSecond, Duration.ofSeconds(1)); @@ -102,7 +103,11 @@ public void handleException(Throwable t, AgentSpan span) { exceptionProbeManager.createProbesForException( throwable.getStackTrace(), chainedExceptionIdx); if (creationResult.probesCreated > 0) { - AgentTaskScheduler.INSTANCE.execute(() -> applyExceptionConfiguration(fingerprint)); + AgentTaskScheduler.INSTANCE.execute( + () -> { + applyExceptionConfiguration(); + exceptionProbeManager.addFingerprint(fingerprint); + }); break; } else { if (LOGGER.isDebugEnabled()) { @@ -118,9 +123,8 @@ public void handleException(Throwable t, AgentSpan span) { } } - private void applyExceptionConfiguration(String fingerprint) { + void applyExceptionConfiguration() { configurationUpdater.accept(EXCEPTION, exceptionProbeManager.getProbes()); - exceptionProbeManager.addFingerprint(fingerprint); } private static void processSnapshotsAndSetTags( diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java index 2eafd524318..0b422688672 100644 --- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/exception/ExceptionProbeManager.java @@ -8,6 +8,7 @@ import datadog.trace.api.Config; import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter; import datadog.trace.bootstrap.debugger.ProbeId; +import datadog.trace.util.AgentTaskScheduler; import java.time.Clock; import java.time.Duration; import java.time.Instant; @@ -24,6 +25,7 @@ /** Manages the probes used for instrumentation of exception stacktraces. */ public class ExceptionProbeManager { + public static final int HOT_LOOP_TIME_WINDOW_IN_SECONDS = 10; private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbeManager.class); private final Map fingerprints = new ConcurrentHashMap<>(); @@ -35,6 +37,7 @@ public class ExceptionProbeManager { private final long captureIntervalS; private final Clock clock; private final int maxCapturedFrames; + private DefaultExceptionDebugger defaultExceptionDebugger; public ExceptionProbeManager(ClassNameFilter classNameFiltering, Duration captureInterval) { this( @@ -67,6 +70,19 @@ public ClassNameFilter getClassNameFilter() { return classNameFiltering; } + public void removeProbe(ExceptionProbe exceptionProbe) { + probes.remove(exceptionProbe.getId()); + if (defaultExceptionDebugger != null) { + AgentTaskScheduler.INSTANCE.execute( + () -> defaultExceptionDebugger.applyExceptionConfiguration()); + } + } + + public boolean shouldRemoveProbe(Instant lastCaptureTime) { + return ChronoUnit.SECONDS.between(lastCaptureTime, Instant.now(clock)) + >= HOT_LOOP_TIME_WINDOW_IN_SECONDS; + } + static class CreationResult { final int probesCreated; final int thirdPartyFrames; @@ -139,18 +155,21 @@ public Map getFingerprints() { return fingerprints; } - public boolean shouldCaptureException(String fingerprint) { - return shouldCaptureException(fingerprint, clock); + public boolean shouldCaptureException(Instant lastCapture) { + return shouldCaptureException(lastCapture, clock); } - boolean shouldCaptureException(String fingerprint, Clock clock) { - Instant lastCapture = fingerprints.get(fingerprint); + boolean shouldCaptureException(Instant lastCapture, Clock clock) { if (lastCapture == null) { return false; } return ChronoUnit.SECONDS.between(lastCapture, Instant.now(clock)) >= captureIntervalS; } + public Instant getLastCapture(String fingerprint) { + return fingerprints.get(fingerprint); + } + public void addSnapshot(Snapshot snapshot) { Throwable throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable(); if (throwable == null) { @@ -213,4 +232,8 @@ public void addSnapshot(Snapshot snapshot) { snapshots.add(snapshot); } } + + void setDefaultExceptionDebugger(DefaultExceptionDebugger defaultExceptionDebugger) { + this.defaultExceptionDebugger = defaultExceptionDebugger; + } } diff --git a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java index c727bece3a1..e051f170c39 100644 --- a/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java +++ b/dd-java-agent/agent-debugger/src/main/java/com/datadog/debugger/probe/ExceptionProbe.java @@ -1,5 +1,6 @@ package com.datadog.debugger.probe; +import static com.datadog.debugger.exception.ExceptionProbeManager.HOT_LOOP_TIME_WINDOW_IN_SECONDS; import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable; import static java.util.Collections.emptyList; @@ -8,12 +9,15 @@ import com.datadog.debugger.exception.Fingerprinter; import com.datadog.debugger.instrumentation.InstrumentationResult; import com.datadog.debugger.sink.Snapshot; +import com.datadog.debugger.util.CircuitBreaker; import datadog.trace.bootstrap.debugger.CapturedContext; import datadog.trace.bootstrap.debugger.MethodLocation; import datadog.trace.bootstrap.debugger.ProbeId; import datadog.trace.bootstrap.debugger.ProbeImplementation; import datadog.trace.bootstrap.debugger.ProbeLocation; import datadog.trace.bootstrap.debugger.el.ValueReferences; +import java.time.Duration; +import java.time.Instant; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,6 +26,9 @@ public class ExceptionProbe extends LogProbe implements ForceMethodInstrumentati private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbe.class); private final transient ExceptionProbeManager exceptionProbeManager; private final transient int chainedExceptionIdx; + private final transient CircuitBreaker circuitBreaker = + new CircuitBreaker(1000, Duration.ofSeconds(HOT_LOOP_TIME_WINDOW_IN_SECONDS)); + private Instant lastCaptureTime = Instant.MIN; public ExceptionProbe( ProbeId probeId, @@ -61,6 +68,14 @@ public CapturedContext.Status createStatus() { @Override public void evaluate( CapturedContext context, CapturedContext.Status status, MethodLocation methodLocation) { + if (!circuitBreaker.trip()) { + if (exceptionProbeManager.shouldRemoveProbe(lastCaptureTime)) { + // hot loop without exception captured => uninstrument this probe + LOGGER.debug("Removing probe {}", id); + exceptionProbeManager.removeProbe(this); + return; + } + } if (!(status instanceof ExceptionProbeStatus)) { throw new IllegalStateException("Invalid status: " + status.getClass()); } @@ -78,7 +93,9 @@ public void evaluate( Throwable innerMostThrowable = getInnerMostThrowable(throwable); String fingerprint = Fingerprinter.fingerprint(innerMostThrowable, exceptionProbeManager.getClassNameFilter()); - if (exceptionProbeManager.shouldCaptureException(fingerprint)) { + Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint); + if (exceptionProbeManager.shouldCaptureException(lastCapture)) { + LOGGER.debug("Capturing exception matching fingerprint: {}", fingerprint); // capture only on uncaught exception matching the fingerprint ExceptionProbeStatus exceptionStatus = (ExceptionProbeStatus) status; diff --git a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java index bafcd3fb0fa..df54898325a 100644 --- a/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java +++ b/dd-java-agent/agent-debugger/src/test/java/com/datadog/debugger/exception/ExceptionProbeManagerTest.java @@ -1,23 +1,42 @@ package com.datadog.debugger.exception; +import static com.datadog.debugger.util.TestHelper.assertWithTimeout; +import static java.util.Arrays.asList; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import com.datadog.debugger.probe.ExceptionProbe; import com.datadog.debugger.util.ClassNameFiltering; import datadog.trace.api.Config; +import datadog.trace.bootstrap.debugger.CapturedContext; +import datadog.trace.bootstrap.debugger.MethodLocation; import java.time.Clock; import java.time.Duration; import java.time.Instant; import java.util.Collections; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.Stream; +import org.jetbrains.annotations.NotNull; import org.junit.jupiter.api.Test; class ExceptionProbeManagerTest { + private static final @NotNull Set FILTERED_PACKAGES = + new HashSet<>( + asList( + "java.", + "jdk.", + "sun.", + "com.sun.", + "org.gradle.", + "worker.org.gradle.", + "org.junit.")); private final RuntimeException exception = new RuntimeException("test"); @Test @@ -25,28 +44,17 @@ public void instrumentStackTrace() { ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll(); ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering); RuntimeException exception = new RuntimeException("test"); - String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering); exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0); assertFalse(exceptionProbeManager.getProbes().isEmpty()); } @Test void instrumentSingleFrame() { - ClassNameFiltering classNameFiltering = - new ClassNameFiltering( - Stream.of( - "java.", - "jdk.", - "sun.", - "com.sun.", - "org.gradle.", - "worker.org.gradle.", - "org.junit.") - .collect(Collectors.toSet())); + ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES); ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering); String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering); - assertEquals("4974b2b4853e6152d8f218fb79a42a761a45335447e22e53d75f5325e742655", fingerprint); + assertEquals("44cbdcfe32eea21c3523e4a5885daabf5b8c9428cc0f613be5ff29663465ff9", fingerprint); exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0); assertEquals(1, exceptionProbeManager.getProbes().size()); ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next(); @@ -82,34 +90,53 @@ void lastCapture() { RuntimeException exception = new RuntimeException("test"); String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering); exceptionProbeManager.addFingerprint(fingerprint); - assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint)); + Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint); + assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture)); exceptionProbeManager.updateLastCapture(fingerprint); - assertFalse(exceptionProbeManager.shouldCaptureException(fingerprint)); + lastCapture = exceptionProbeManager.getLastCapture(fingerprint); + assertFalse(exceptionProbeManager.shouldCaptureException(lastCapture)); Clock clock = Clock.fixed(Instant.now().plus(Duration.ofMinutes(61)), Clock.systemUTC().getZone()); - assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint, clock)); + lastCapture = exceptionProbeManager.getLastCapture(fingerprint); + assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture, clock)); } @Test void maxFrames() { RuntimeException deepException = level1(); - ClassNameFiltering classNameFiltering = - new ClassNameFiltering( - Stream.of( - "java.", - "jdk.", - "sun.", - "com.sun.", - "org.gradle.", - "worker.org.gradle.", - "org.junit.") - .collect(Collectors.toSet())); + ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES); ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering, Duration.ofHours(1), Clock.systemUTC(), 3); exceptionProbeManager.createProbesForException(deepException.getStackTrace(), 0); assertEquals(3, exceptionProbeManager.getProbes().size()); } + @Test + public void removeExceptionProbeOnHotLoop() { + ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES); + ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering); + AtomicBoolean configApplied = new AtomicBoolean(false); + DefaultExceptionDebugger defaultExceptionDebugger = mock(DefaultExceptionDebugger.class); + doAnswer( + invocationOnMock -> { + configApplied.set(true); + return null; + }) + .when(defaultExceptionDebugger) + .applyExceptionConfiguration(); + exceptionProbeManager.setDefaultExceptionDebugger(defaultExceptionDebugger); + exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0); + assertEquals(1, exceptionProbeManager.getProbes().size()); + ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next(); + // simulate a code hot loop + for (int i = 0; i < 2000; i++) { + CapturedContext context = mock(CapturedContext.class); + exceptionProbe.evaluate(context, exceptionProbe.createStatus(), MethodLocation.EXIT); + } + assertEquals(0, exceptionProbeManager.getProbes().size()); + assertWithTimeout(configApplied::get, Duration.ofSeconds(30)); + } + RuntimeException level1() { return level2(); }