Skip to content

Commit 7ff02ca

Browse files
[test optimization] Fix EFD retries in jest (#7637)
1 parent 46a09e4 commit 7ff02ca

10 files changed

Lines changed: 296 additions & 14 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
'use strict'
2+
3+
const assert = require('assert')
4+
5+
describe('efd slow retries', () => {
6+
it('instant test', () => {
7+
assert.strictEqual(1 + 1, 2)
8+
})
9+
})
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
'use strict'
2+
3+
const assert = require('assert')
4+
5+
describe('efd slow retries', () => {
6+
it('slightly slow test', async () => {
7+
await new Promise(resolve => setTimeout(resolve, 5100))
8+
assert.strictEqual(1 + 1, 2)
9+
})
10+
})

integration-tests/jest/jest.spec.js

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3776,6 +3776,135 @@ describe(`jest@${JEST_VERSION} commonJS`, () => {
37763776
const [[exitCode]] = await Promise.all([once(childProcess, 'exit'), testAssertionsPromise])
37773777
assert.strictEqual(exitCode, 1, 'exit code 1 when suite fails (resolution error, EFD)')
37783778
})
3779+
3780+
it('retries a fast new test using the count from the matching slow_test_retries bucket', async () => {
3781+
receiver.setInfoResponse({ endpoints: ['/evp_proxy/v4'] })
3782+
receiver.setKnownTests({ jest: {} })
3783+
receiver.setSettings({
3784+
early_flake_detection: {
3785+
enabled: true,
3786+
slow_test_retries: {
3787+
'5s': 2,
3788+
'10s': 1,
3789+
},
3790+
faulty_session_threshold: 100,
3791+
},
3792+
known_tests_enabled: true,
3793+
})
3794+
3795+
const eventsPromise = receiver
3796+
.gatherPayloadsMaxTimeout(({ url }) => url.endsWith('/api/v2/citestcycle'), (payloads) => {
3797+
const events = payloads.flatMap(({ payload }) => payload.events)
3798+
const tests = events.filter(event => event.type === 'test').map(event => event.content)
3799+
const testEvents = tests.filter(t => t.resource?.includes('instant-test'))
3800+
// 1 original + 2 retries from the '5s' bucket (fast test < 5 s)
3801+
assert.strictEqual(testEvents.length, 3)
3802+
const efdRetries = testEvents.filter(t =>
3803+
t.meta[TEST_IS_RETRY] === 'true' && t.meta[TEST_RETRY_REASON] === TEST_RETRY_REASON_TYPES.efd
3804+
)
3805+
assert.strictEqual(efdRetries.length, 2)
3806+
testEvents.forEach(t => assert.strictEqual(t.meta[TEST_IS_NEW], 'true'))
3807+
})
3808+
3809+
childProcess = exec(
3810+
runTestsCommand,
3811+
{
3812+
cwd,
3813+
env: {
3814+
...getCiVisAgentlessConfig(receiver.port),
3815+
TESTS_TO_RUN: 'test-early-flake-detection/instant-test',
3816+
},
3817+
}
3818+
)
3819+
3820+
await Promise.all([once(childProcess, 'exit'), eventsPromise])
3821+
})
3822+
3823+
it('retries a slightly slow new test using the count from the matching slow_test_retries bucket', async () => {
3824+
receiver.setInfoResponse({ endpoints: ['/evp_proxy/v4'] })
3825+
receiver.setKnownTests({ jest: {} })
3826+
receiver.setSettings({
3827+
early_flake_detection: {
3828+
enabled: true,
3829+
slow_test_retries: {
3830+
'5s': 3,
3831+
'10s': 1,
3832+
},
3833+
faulty_session_threshold: 100,
3834+
},
3835+
known_tests_enabled: true,
3836+
})
3837+
3838+
const eventsPromise = receiver
3839+
// test runs for ~5100 ms × 2 executions; allow extra time for jest startup + reporting
3840+
.gatherPayloadsMaxTimeout(({ url }) => url.endsWith('/api/v2/citestcycle'), (payloads) => {
3841+
const events = payloads.flatMap(({ payload }) => payload.events)
3842+
const tests = events.filter(event => event.type === 'test').map(event => event.content)
3843+
const testEvents = tests.filter(t => t.resource?.includes('slightly-slow-test'))
3844+
// 1 original + 1 retry from the '10s' bucket (test takes ~5100 ms, between 5 s and 10 s)
3845+
assert.strictEqual(testEvents.length, 2)
3846+
const efdRetries = testEvents.filter(t =>
3847+
t.meta[TEST_IS_RETRY] === 'true' && t.meta[TEST_RETRY_REASON] === TEST_RETRY_REASON_TYPES.efd
3848+
)
3849+
assert.strictEqual(efdRetries.length, 1)
3850+
testEvents.forEach(t => assert.strictEqual(t.meta[TEST_IS_NEW], 'true'))
3851+
}, 25_000)
3852+
3853+
childProcess = exec(
3854+
runTestsCommand,
3855+
{
3856+
cwd,
3857+
env: {
3858+
...getCiVisAgentlessConfig(receiver.port),
3859+
TESTS_TO_RUN: 'test-early-flake-detection/slightly-slow-test',
3860+
},
3861+
}
3862+
)
3863+
3864+
await Promise.all([once(childProcess, 'exit'), eventsPromise])
3865+
})
3866+
3867+
it('aborts retries and tags the test when the test is too slow for any slow_test_retries bucket', async () => {
3868+
receiver.setInfoResponse({ endpoints: ['/evp_proxy/v4'] })
3869+
receiver.setKnownTests({ jest: {} })
3870+
receiver.setSettings({
3871+
early_flake_detection: {
3872+
enabled: true,
3873+
slow_test_retries: {
3874+
'5s': 3,
3875+
'10s': 0,
3876+
},
3877+
faulty_session_threshold: 100,
3878+
},
3879+
known_tests_enabled: true,
3880+
})
3881+
3882+
const eventsPromise = receiver
3883+
.gatherPayloadsMaxTimeout(({ url }) => url.endsWith('/api/v2/citestcycle'), (payloads) => {
3884+
const events = payloads.flatMap(({ payload }) => payload.events)
3885+
const tests = events.filter(event => event.type === 'test').map(event => event.content)
3886+
const testEvents = tests.filter(t => t.resource?.includes('slightly-slow-test'))
3887+
// 0 retries — bucket value is 0
3888+
assert.strictEqual(testEvents.length, 1)
3889+
const [testEvent] = testEvents
3890+
assert.strictEqual(testEvent.meta[TEST_IS_NEW], 'true')
3891+
assert.strictEqual(testEvent.meta[TEST_EARLY_FLAKE_ABORT_REASON], 'slow')
3892+
assert.ok(!(TEST_IS_RETRY in testEvent.meta), 'should not be retried')
3893+
}, 20_000)
3894+
3895+
childProcess = exec(
3896+
runTestsCommand,
3897+
{
3898+
cwd,
3899+
env: {
3900+
...getCiVisAgentlessConfig(receiver.port),
3901+
TESTS_TO_RUN: 'test-early-flake-detection/slightly-slow-test',
3902+
},
3903+
}
3904+
)
3905+
3906+
await Promise.all([once(childProcess, 'exit'), eventsPromise])
3907+
})
37793908
})
37803909

37813910
context('flaky test retries', () => {

packages/datadog-instrumentations/src/jest.js

Lines changed: 69 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ const {
2121
getFormattedJestTestParameters,
2222
getJestTestName,
2323
getJestSuitesToRun,
24+
getEfdRetryCount,
2425
} = require('../../datadog-plugin-jest/src/util')
2526
const { addHook, channel } = require('./helpers/instrument')
2627

@@ -76,6 +77,7 @@ let hasUnskippableSuites = false
7677
let hasForcedToRunSuites = false
7778
let isEarlyFlakeDetectionEnabled = false
7879
let earlyFlakeDetectionNumRetries = 0
80+
let earlyFlakeDetectionSlowTestRetries = {}
7981
let earlyFlakeDetectionFaultyThreshold = 30
8082
let isEarlyFlakeDetectionFaulty = false
8183
let hasFilteredSkippableSuites = false
@@ -95,6 +97,12 @@ const attemptToFixRetriedTestsStatuses = new Map()
9597
const wrappedWorkers = new WeakSet()
9698
const testSuiteMockedFiles = new Map()
9799
const testsToBeRetried = new Set()
100+
// Per-test: how many EFD retries were determined after the first execution.
101+
const efdDeterminedRetries = new Map()
102+
// Tests whose first run exceeded the 5-min threshold — tagged "slow".
103+
const efdSlowAbortedTests = new Set()
104+
// Tests added as EFD new-test candidates (not ATF, not impacted).
105+
const efdNewTestCandidates = new Set()
98106
const testSuiteAbsolutePathsWithFastCheck = new Set()
99107
const testSuiteJestObjects = new Map()
100108

@@ -197,7 +205,7 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
197205
this.isImpactedTestsEnabled = this.testEnvironmentOptions._ddIsImpactedTestsEnabled
198206

199207
if (this.isKnownTestsEnabled) {
200-
earlyFlakeDetectionNumRetries = this.testEnvironmentOptions._ddEarlyFlakeDetectionNumRetries
208+
earlyFlakeDetectionSlowTestRetries = this.testEnvironmentOptions._ddEarlyFlakeDetectionSlowTestRetries ?? {}
201209
try {
202210
this.knownTestsForThisSuite = this.getKnownTestsForSuite(this.testEnvironmentOptions._ddKnownTests)
203211

@@ -543,11 +551,9 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
543551
retriedTestsToNumAttempts.set(testFullName, 0)
544552
if (this.isEarlyFlakeDetectionEnabled) {
545553
testsToBeRetried.add(testFullName)
546-
this.retryTest({
547-
jestEvent: event,
548-
retryCount: earlyFlakeDetectionNumRetries,
549-
retryType: 'Early flake detection',
550-
})
554+
efdNewTestCandidates.add(testFullName)
555+
// Cloning is deferred to test_done after the first execution,
556+
// when we know the duration and can choose the right retry count.
551557
}
552558
}
553559
}
@@ -572,8 +578,8 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
572578
let attemptToFixFailed = false
573579
let failedAllTests = false
574580
let isAttemptToFix = false
581+
const testName = getJestTestName(event.test, this.getShouldStripSeedFromTestName())
575582
if (this.isTestManagementTestsEnabled) {
576-
const testName = getJestTestName(event.test, this.getShouldStripSeedFromTestName())
577583
isAttemptToFix = this.testManagementTestsForThisSuite?.attemptToFix?.includes(testName)
578584
if (isAttemptToFix) {
579585
if (attemptToFixRetriedTestsStatuses.has(testName)) {
@@ -598,9 +604,53 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
598604
}
599605
}
600606

607+
// EFD dynamic cloning: on first execution of a new EFD candidate,
608+
// determine the retry count from the test's duration.
609+
if (
610+
this.isEarlyFlakeDetectionEnabled &&
611+
this.isKnownTestsEnabled &&
612+
efdNewTestCandidates.has(testName) &&
613+
event.test.invocations === 1 &&
614+
!efdDeterminedRetries.has(testName)
615+
) {
616+
const durationMs = event.test.duration ?? 0
617+
const retryCount = getEfdRetryCount(durationMs, earlyFlakeDetectionSlowTestRetries)
618+
efdDeterminedRetries.set(testName, retryCount)
619+
if (retryCount > 0) {
620+
// Temporarily adjust jest-circus state so that retry tests are registered
621+
// into the correct describe block and bypass the "tests have started" guard.
622+
//
623+
// Problem 1 (jest-circus ≤24): currentDescribeBlock points to ROOT during
624+
// execution, and ROOT's tests loop already finished before children ran.
625+
//
626+
// Problem 2 (jest-circus ≥27): `hasStarted = true` causes `test()` to throw
627+
// "Cannot add a test after tests have started running".
628+
//
629+
// Fix: temporarily point currentDescribeBlock to the test's parent (so retries
630+
// land in the still-iterating children array) and set hasStarted = false (so the
631+
// guard is bypassed). Both are restored immediately after scheduling the retries.
632+
const originalDescribeBlock = state.currentDescribeBlock
633+
const originalHasStarted = state.hasStarted
634+
state.currentDescribeBlock = event.test.parent ?? originalDescribeBlock
635+
state.hasStarted = false
636+
this.retryTest({
637+
jestEvent: {
638+
testName: event.test.name,
639+
fn: event.test.fn,
640+
timeout: event.test.timeout,
641+
},
642+
retryCount,
643+
retryType: 'Early flake detection',
644+
})
645+
state.currentDescribeBlock = originalDescribeBlock
646+
state.hasStarted = originalHasStarted
647+
} else {
648+
efdSlowAbortedTests.add(testName)
649+
}
650+
}
651+
601652
let isEfdRetry = false
602653
// We'll store the test statuses of the retries
603-
const testName = getJestTestName(event.test, this.getShouldStripSeedFromTestName())
604654
if (this.isKnownTestsEnabled) {
605655
const isNewTest = retriedTestsToNumAttempts.has(testName)
606656
if (isNewTest) {
@@ -613,7 +663,8 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
613663
const testStatuses = newTestsTestStatuses.get(testName)
614664
// Check if this is the last EFD retry.
615665
// If it is, we'll set the failedAllTests flag to true if all the tests failed
616-
if (testStatuses.length === earlyFlakeDetectionNumRetries + 1 &&
666+
const efdRetryCount = efdDeterminedRetries.get(testName) ?? 0
667+
if (efdRetryCount > 0 && testStatuses.length === efdRetryCount + 1 &&
617668
testStatuses.every(status => status === 'fail')) {
618669
failedAllTests = true
619670
}
@@ -671,6 +722,7 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
671722
attemptToFixFailed,
672723
isAtrRetry,
673724
finalStatus,
725+
earlyFlakeAbortReason: efdSlowAbortedTests.has(testName) ? 'slow' : undefined,
674726
})
675727

676728
if (promises.isProbeReady) {
@@ -682,6 +734,9 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
682734
test.errors = errors
683735
}
684736
atrSuppressedErrors.clear()
737+
efdDeterminedRetries.clear()
738+
efdSlowAbortedTests.clear()
739+
efdNewTestCandidates.clear()
685740
}
686741
if (event.name === 'test_skip' || event.name === 'test_todo') {
687742
const testName = getJestTestName(event.test, this.getShouldStripSeedFromTestName())
@@ -702,7 +757,9 @@ function getWrappedEnvironment (BaseEnvironment, jestVersion) {
702757
getEfdResult ({ testName, isNewTest, isModifiedTest, isEfdRetry, numberOfExecutedRetries }) {
703758
const isEfdEnabled = this.isEarlyFlakeDetectionEnabled
704759
const isEfdActive = isEfdEnabled && (isNewTest || isModifiedTest)
705-
const isLastEfdRetry = isEfdRetry && numberOfExecutedRetries >= (earlyFlakeDetectionNumRetries + 1)
760+
const retryCount = efdDeterminedRetries.get(testName) ?? 0
761+
const isSlowAbort = efdSlowAbortedTests.has(testName)
762+
const isLastEfdRetry = (isEfdRetry && numberOfExecutedRetries >= (retryCount + 1)) || isSlowAbort
706763
const isFinalEfdTestExecution = isEfdActive && isLastEfdRetry
707764

708765
let finalStatus
@@ -939,6 +996,7 @@ function getCliWrapper (isNewJestVersion) {
939996
isSuitesSkippingEnabled = libraryConfig.isSuitesSkippingEnabled
940997
isEarlyFlakeDetectionEnabled = libraryConfig.isEarlyFlakeDetectionEnabled
941998
earlyFlakeDetectionNumRetries = libraryConfig.earlyFlakeDetectionNumRetries
999+
earlyFlakeDetectionSlowTestRetries = libraryConfig.earlyFlakeDetectionSlowTestRetries ?? {}
9421000
earlyFlakeDetectionFaultyThreshold = libraryConfig.earlyFlakeDetectionFaultyThreshold
9431001
isKnownTestsEnabled = libraryConfig.isKnownTestsEnabled
9441002
isTestManagementTestsEnabled = libraryConfig.isTestManagementEnabled
@@ -1514,7 +1572,7 @@ addHook({
15141572
_ddItrCorrelationId,
15151573
_ddKnownTests,
15161574
_ddIsEarlyFlakeDetectionEnabled,
1517-
_ddEarlyFlakeDetectionNumRetries,
1575+
_ddEarlyFlakeDetectionSlowTestRetries,
15181576
_ddRepositoryRoot,
15191577
_ddIsFlakyTestRetriesEnabled,
15201578
_ddFlakyTestRetriesCount,

packages/datadog-plugin-jest/src/index.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class JestPlugin extends CiPlugin {
185185
config._ddRequestErrorTags = this.getSessionRequestErrorTags()
186186
config._ddItrCorrelationId = this.itrCorrelationId
187187
config._ddIsEarlyFlakeDetectionEnabled = !!this.libraryConfig?.isEarlyFlakeDetectionEnabled
188-
config._ddEarlyFlakeDetectionNumRetries = this.libraryConfig?.earlyFlakeDetectionNumRetries ?? 0
188+
config._ddEarlyFlakeDetectionSlowTestRetries = this.libraryConfig?.earlyFlakeDetectionSlowTestRetries ?? {}
189189
config._ddRepositoryRoot = this.repositoryRoot
190190
config._ddIsFlakyTestRetriesEnabled = this.libraryConfig?.isFlakyTestRetriesEnabled ?? false
191191
config._ddIsTestManagementTestsEnabled = this.libraryConfig?.isTestManagementEnabled ?? false
@@ -395,6 +395,7 @@ class JestPlugin extends CiPlugin {
395395
attemptToFixFailed,
396396
isAtrRetry,
397397
finalStatus,
398+
earlyFlakeAbortReason,
398399
}) => {
399400
span.setTag(TEST_STATUS, status)
400401
if (finalStatus) {
@@ -415,6 +416,9 @@ class JestPlugin extends CiPlugin {
415416
span.setTag(TEST_IS_RETRY, 'true')
416417
span.setTag(TEST_RETRY_REASON, TEST_RETRY_REASON_TYPES.atr)
417418
}
419+
if (earlyFlakeAbortReason) {
420+
span.setTag(TEST_EARLY_FLAKE_ABORT_REASON, earlyFlakeAbortReason)
421+
}
418422

419423
this.telemetry.ciVisEvent(
420424
TELEMETRY_EVENT_FINISHED,

packages/datadog-plugin-jest/src/util.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
const { readFileSync } = require('fs')
44
const { parse } = require('../../../vendor/dist/jest-docblock')
55

6-
const { getTestSuitePath } = require('../../dd-trace/src/plugins/util/test')
6+
const { getTestSuitePath, getEfdRetryCount } = require('../../dd-trace/src/plugins/util/test')
77
const log = require('../../dd-trace/src/log')
88

99
/**
@@ -172,4 +172,5 @@ module.exports = {
172172
getJestTestName,
173173
getJestSuitesToRun,
174174
isMarkedAsUnskippable,
175+
getEfdRetryCount,
175176
}

0 commit comments

Comments
 (0)