Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ public DefaultExceptionDebugger(
ClassNameFilter classNameFiltering,
int maxExceptionPerSecond) {
this.exceptionProbeManager = exceptionProbeManager;
this.exceptionProbeManager.setDefaultExceptionDebugger(this);
this.configurationUpdater = configurationUpdater;
this.classNameFiltering = classNameFiltering;
this.circuitBreaker = new CircuitBreaker(maxExceptionPerSecond, Duration.ofSeconds(1));
Expand Down Expand Up @@ -102,7 +103,11 @@ public void handleException(Throwable t, AgentSpan span) {
exceptionProbeManager.createProbesForException(
throwable.getStackTrace(), chainedExceptionIdx);
if (creationResult.probesCreated > 0) {
AgentTaskScheduler.INSTANCE.execute(() -> applyExceptionConfiguration(fingerprint));
AgentTaskScheduler.INSTANCE.execute(
() -> {
applyExceptionConfiguration();
exceptionProbeManager.addFingerprint(fingerprint);
});
break;
} else {
if (LOGGER.isDebugEnabled()) {
Expand All @@ -118,9 +123,8 @@ public void handleException(Throwable t, AgentSpan span) {
}
}

private void applyExceptionConfiguration(String fingerprint) {
void applyExceptionConfiguration() {
configurationUpdater.accept(EXCEPTION, exceptionProbeManager.getProbes());
exceptionProbeManager.addFingerprint(fingerprint);
}

private static void processSnapshotsAndSetTags(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import datadog.trace.api.Config;
import datadog.trace.bootstrap.debugger.DebuggerContext.ClassNameFilter;
import datadog.trace.bootstrap.debugger.ProbeId;
import datadog.trace.util.AgentTaskScheduler;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
Expand All @@ -24,6 +25,7 @@

/** Manages the probes used for instrumentation of exception stacktraces. */
public class ExceptionProbeManager {
public static final int HOT_LOOP_TIME_WINDOW_IN_SECONDS = 10;
private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbeManager.class);

private final Map<String, Instant> fingerprints = new ConcurrentHashMap<>();
Expand All @@ -35,6 +37,7 @@ public class ExceptionProbeManager {
private final long captureIntervalS;
private final Clock clock;
private final int maxCapturedFrames;
private DefaultExceptionDebugger defaultExceptionDebugger;

public ExceptionProbeManager(ClassNameFilter classNameFiltering, Duration captureInterval) {
this(
Expand Down Expand Up @@ -67,6 +70,19 @@ public ClassNameFilter getClassNameFilter() {
return classNameFiltering;
}

public void removeProbe(ExceptionProbe exceptionProbe) {
probes.remove(exceptionProbe.getId());
if (defaultExceptionDebugger != null) {
AgentTaskScheduler.INSTANCE.execute(
() -> defaultExceptionDebugger.applyExceptionConfiguration());
}
}

public boolean shouldRemoveProbe(Instant lastCaptureTime) {
return ChronoUnit.SECONDS.between(lastCaptureTime, Instant.now(clock))
>= HOT_LOOP_TIME_WINDOW_IN_SECONDS;
}

static class CreationResult {
final int probesCreated;
final int thirdPartyFrames;
Expand Down Expand Up @@ -139,18 +155,21 @@ public Map<String, Instant> getFingerprints() {
return fingerprints;
}

public boolean shouldCaptureException(String fingerprint) {
return shouldCaptureException(fingerprint, clock);
public boolean shouldCaptureException(Instant lastCapture) {
return shouldCaptureException(lastCapture, clock);
}

boolean shouldCaptureException(String fingerprint, Clock clock) {
Instant lastCapture = fingerprints.get(fingerprint);
boolean shouldCaptureException(Instant lastCapture, Clock clock) {
if (lastCapture == null) {
return false;
}
return ChronoUnit.SECONDS.between(lastCapture, Instant.now(clock)) >= captureIntervalS;
}

public Instant getLastCapture(String fingerprint) {
return fingerprints.get(fingerprint);
}

public void addSnapshot(Snapshot snapshot) {
Throwable throwable = snapshot.getCaptures().getReturn().getCapturedThrowable().getThrowable();
if (throwable == null) {
Expand Down Expand Up @@ -213,4 +232,8 @@ public void addSnapshot(Snapshot snapshot) {
snapshots.add(snapshot);
}
}

void setDefaultExceptionDebugger(DefaultExceptionDebugger defaultExceptionDebugger) {
this.defaultExceptionDebugger = defaultExceptionDebugger;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.datadog.debugger.probe;

import static com.datadog.debugger.exception.ExceptionProbeManager.HOT_LOOP_TIME_WINDOW_IN_SECONDS;
import static com.datadog.debugger.util.ExceptionHelper.getInnerMostThrowable;
import static java.util.Collections.emptyList;

Expand All @@ -8,12 +9,15 @@
import com.datadog.debugger.exception.Fingerprinter;
import com.datadog.debugger.instrumentation.InstrumentationResult;
import com.datadog.debugger.sink.Snapshot;
import com.datadog.debugger.util.CircuitBreaker;
import datadog.trace.bootstrap.debugger.CapturedContext;
import datadog.trace.bootstrap.debugger.MethodLocation;
import datadog.trace.bootstrap.debugger.ProbeId;
import datadog.trace.bootstrap.debugger.ProbeImplementation;
import datadog.trace.bootstrap.debugger.ProbeLocation;
import datadog.trace.bootstrap.debugger.el.ValueReferences;
import java.time.Duration;
import java.time.Instant;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -22,6 +26,9 @@ public class ExceptionProbe extends LogProbe implements ForceMethodInstrumentati
private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionProbe.class);
private final transient ExceptionProbeManager exceptionProbeManager;
private final transient int chainedExceptionIdx;
private final transient CircuitBreaker circuitBreaker =
new CircuitBreaker(1000, Duration.ofSeconds(HOT_LOOP_TIME_WINDOW_IN_SECONDS));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to make the 1000 configurable? currently any method that run more then 100 times a second will be filtered out.

How are we going to refine this value? what is the plan?

private Instant lastCaptureTime = Instant.MIN;

public ExceptionProbe(
ProbeId probeId,
Expand Down Expand Up @@ -61,6 +68,14 @@ public CapturedContext.Status createStatus() {
@Override
public void evaluate(
CapturedContext context, CapturedContext.Status status, MethodLocation methodLocation) {
if (!circuitBreaker.trip()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this run after the check methodLocation != MethodLocation.EXIT ? or maybe it is not a thing... but if it is - then this trip() is get called twice?

if (exceptionProbeManager.shouldRemoveProbe(lastCaptureTime)) {
// hot loop without exception captured => uninstrument this probe
LOGGER.debug("Removing probe {}", id);
exceptionProbeManager.removeProbe(this);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we want to report this?

  1. maybe this should be trigger telemetry log event so we know how many times this happen to us.
  2. we we do capture the exception. should we report on that user frame as "HOT STUFF" so the UI would be able to display a message for the user why this frame was not captured?

return;
}
}
if (!(status instanceof ExceptionProbeStatus)) {
throw new IllegalStateException("Invalid status: " + status.getClass());
}
Expand All @@ -78,7 +93,9 @@ public void evaluate(
Throwable innerMostThrowable = getInnerMostThrowable(throwable);
String fingerprint =
Fingerprinter.fingerprint(innerMostThrowable, exceptionProbeManager.getClassNameFilter());
if (exceptionProbeManager.shouldCaptureException(fingerprint)) {
Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
if (exceptionProbeManager.shouldCaptureException(lastCapture)) {

LOGGER.debug("Capturing exception matching fingerprint: {}", fingerprint);
// capture only on uncaught exception matching the fingerprint
ExceptionProbeStatus exceptionStatus = (ExceptionProbeStatus) status;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,60 @@
package com.datadog.debugger.exception;

import static com.datadog.debugger.util.TestHelper.assertWithTimeout;
import static java.util.Arrays.asList;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import com.datadog.debugger.probe.ExceptionProbe;
import com.datadog.debugger.util.ClassNameFiltering;
import datadog.trace.api.Config;
import datadog.trace.bootstrap.debugger.CapturedContext;
import datadog.trace.bootstrap.debugger.MethodLocation;
import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
import org.junit.jupiter.api.Test;

class ExceptionProbeManagerTest {
private static final @NotNull Set<String> FILTERED_PACKAGES =
new HashSet<>(
asList(
"java.",
"jdk.",
"sun.",
"com.sun.",
"org.gradle.",
"worker.org.gradle.",
"org.junit."));
private final RuntimeException exception = new RuntimeException("test");

@Test
public void instrumentStackTrace() {
ClassNameFiltering classNameFiltering = ClassNameFiltering.allowAll();
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
RuntimeException exception = new RuntimeException("test");
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
assertFalse(exceptionProbeManager.getProbes().isEmpty());
}

@Test
void instrumentSingleFrame() {
ClassNameFiltering classNameFiltering =
new ClassNameFiltering(
Stream.of(
"java.",
"jdk.",
"sun.",
"com.sun.",
"org.gradle.",
"worker.org.gradle.",
"org.junit.")
.collect(Collectors.toSet()));
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);

String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
assertEquals("4974b2b4853e6152d8f218fb79a42a761a45335447e22e53d75f5325e742655", fingerprint);
assertEquals("44cbdcfe32eea21c3523e4a5885daabf5b8c9428cc0f613be5ff29663465ff9", fingerprint);
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
assertEquals(1, exceptionProbeManager.getProbes().size());
ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next();
Expand Down Expand Up @@ -82,34 +90,53 @@ void lastCapture() {
RuntimeException exception = new RuntimeException("test");
String fingerprint = Fingerprinter.fingerprint(exception, classNameFiltering);
exceptionProbeManager.addFingerprint(fingerprint);
assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint));
Instant lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture));
exceptionProbeManager.updateLastCapture(fingerprint);
assertFalse(exceptionProbeManager.shouldCaptureException(fingerprint));
lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
assertFalse(exceptionProbeManager.shouldCaptureException(lastCapture));
Clock clock =
Clock.fixed(Instant.now().plus(Duration.ofMinutes(61)), Clock.systemUTC().getZone());
assertTrue(exceptionProbeManager.shouldCaptureException(fingerprint, clock));
lastCapture = exceptionProbeManager.getLastCapture(fingerprint);
assertTrue(exceptionProbeManager.shouldCaptureException(lastCapture, clock));
}

@Test
void maxFrames() {
RuntimeException deepException = level1();
ClassNameFiltering classNameFiltering =
new ClassNameFiltering(
Stream.of(
"java.",
"jdk.",
"sun.",
"com.sun.",
"org.gradle.",
"worker.org.gradle.",
"org.junit.")
.collect(Collectors.toSet()));
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
ExceptionProbeManager exceptionProbeManager =
new ExceptionProbeManager(classNameFiltering, Duration.ofHours(1), Clock.systemUTC(), 3);
exceptionProbeManager.createProbesForException(deepException.getStackTrace(), 0);
assertEquals(3, exceptionProbeManager.getProbes().size());
}

@Test
public void removeExceptionProbeOnHotLoop() {
ClassNameFiltering classNameFiltering = new ClassNameFiltering(FILTERED_PACKAGES);
ExceptionProbeManager exceptionProbeManager = new ExceptionProbeManager(classNameFiltering);
AtomicBoolean configApplied = new AtomicBoolean(false);
DefaultExceptionDebugger defaultExceptionDebugger = mock(DefaultExceptionDebugger.class);
doAnswer(
invocationOnMock -> {
configApplied.set(true);
return null;
})
.when(defaultExceptionDebugger)
.applyExceptionConfiguration();
exceptionProbeManager.setDefaultExceptionDebugger(defaultExceptionDebugger);
exceptionProbeManager.createProbesForException(exception.getStackTrace(), 0);
assertEquals(1, exceptionProbeManager.getProbes().size());
ExceptionProbe exceptionProbe = exceptionProbeManager.getProbes().iterator().next();
// simulate a code hot loop
for (int i = 0; i < 2000; i++) {
CapturedContext context = mock(CapturedContext.class);
exceptionProbe.evaluate(context, exceptionProbe.createStatus(), MethodLocation.EXIT);
}
assertEquals(0, exceptionProbeManager.getProbes().size());
assertWithTimeout(configApplied::get, Duration.ofSeconds(30));
}

RuntimeException level1() {
return level2();
}
Expand Down
Loading