Skip to content

Commit 06cc8ac

Browse files
authored
Fix profiler test flakiness (#5549)
1 parent 4686a4f commit 06cc8ac

2 files changed

Lines changed: 35 additions & 5 deletions

File tree

integration-tests/profiler/codehotspots.js

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
'use strict'
22

33
const DDTrace = require('dd-trace')
4-
54
const tracer = DDTrace.init()
65

6+
// Busy cycle duration is communicated in nanoseconds through the environment
7+
// variable by the test. On first execution, it'll be 10 * the sampling period
8+
// at 99Hz (so, 101010101ns). If subsequent executions are needed, it will be
9+
// prolonged.
10+
const busyCycleTime = BigInt(process.env.BUSY_CYCLE_TIME)
11+
712
function busyLoop () {
813
const start = process.hrtime.bigint()
914
let x = 0
1015
for (;;) {
1116
const now = process.hrtime.bigint()
12-
// Busy cycle for 100ms
13-
if (now - start > 100000000n) {
17+
// Busy cycle
18+
if (now - start > busyCycleTime) {
1419
break
1520
}
1621
// Do something in addition to invoking hrtime

integration-tests/profiler/profiler.spec.js

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,19 @@ describe('profiler', () => {
286286
let oomExecArgv
287287
const timeout = 30000
288288

289+
// Target sample count per span for the code hotspots test
290+
const idealSamplesPerSpan = 10
291+
292+
// Intrinsic invariants for the code hotspots test
293+
const expectedSpans = 9 // codehotspots.js creates 3x3 spans
294+
const profilerSamplingFrequency = 99 // Hz
295+
296+
// Computed values for the code hotspots test. busyCycleTimeNs is adaptively adjusted by the test
297+
// when it needs to be repeated.
298+
const idealSampleCount = idealSamplesPerSpan * expectedSpans // we'd like 10 samples per span, ideally
299+
let busyCycleTimeNs = 1000000000 * idealSamplesPerSpan / profilerSamplingFrequency
300+
const maxBusyCycleTimeNs = (timeout - 1000) * 1000000 / expectedSpans
301+
289302
before(async () => {
290303
sandbox = await createSandbox()
291304
cwd = sandbox.folder
@@ -300,13 +313,17 @@ describe('profiler', () => {
300313
})
301314

302315
if (process.platform !== 'win32') {
303-
it('code hotspots and endpoint tracing works', async () => {
316+
it('code hotspots and endpoint tracing works', async function () {
317+
// see comment on busyCycleTimeNs recomputation below. Ideally a single retry should be enough
318+
// with recomputed busyCycleTimeNs, but let's give ourselves more leeway.
319+
this.retries(9)
304320
const procStart = BigInt(Date.now() * 1000000)
305321
const proc = fork(path.join(cwd, 'profiler/codehotspots.js'), {
306322
cwd,
307323
env: {
308324
DD_PROFILING_EXPORTERS: 'file',
309-
DD_PROFILING_ENABLED: 1
325+
DD_PROFILING_ENABLED: 1,
326+
BUSY_CYCLE_TIME: (busyCycleTimeNs | 0).toString()
310327
}
311328
})
312329

@@ -319,6 +336,14 @@ describe('profiler', () => {
319336

320337
const { profile, encoded } = await getLatestProfile(cwd, /^wall_.+\.pprof$/)
321338

339+
// Recompute in case we need to retry. It is possible that some of the assertions in the test
340+
// will fail because we gathered a too small number of samples. This can happen if the machine
341+
// is CPU-constrained so the V8 thread that triggers PROF signals gets CPU starved. If we need
342+
// to retry, the busyCycleTime will be prolonged to have the next execution of codehotspots.js
343+
// run for long enough in the current environment for the profiler to capture the ideal number
344+
// of samples.
345+
busyCycleTimeNs = Math.min(maxBusyCycleTimeNs, busyCycleTimeNs * idealSampleCount / profile.sample.length)
346+
322347
// We check the profile for following invariants:
323348
// - every sample needs to have an 'end_timestamp_ns' label that has values (nanos since UNIX
324349
// epoch) between process start and end.

0 commit comments

Comments
 (0)