-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
feat(core): Add weight-based flushing to span buffer #19579
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,6 @@ | ||
| import type { DurationUnit, FractionUnit, InformationUnit } from './types-hoist/measurement'; | ||
| import type { Primitive } from './types-hoist/misc'; | ||
| import { isPrimitive } from './utils/is'; | ||
|
|
||
| export type RawAttributes<T> = T & ValidatedAttributes<T>; | ||
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
|
|
@@ -127,6 +129,46 @@ export function serializeAttributes<T>( | |
| return serializedAttributes; | ||
| } | ||
|
|
||
| /** | ||
| * Estimates the serialized byte size of {@link Attributes}, | ||
| * with a couple of heuristics for performance. | ||
| */ | ||
| export function estimateTypedAttributesSizeInBytes(attributes: Attributes | undefined): number { | ||
| if (!attributes) { | ||
| return 0; | ||
| } | ||
| let weight = 0; | ||
| for (const [key, attr] of Object.entries(attributes)) { | ||
| weight += key.length * 2; | ||
| weight += attr.type.length * 2; | ||
| weight += (attr.unit?.length ?? 0) * 2; | ||
| const val = attr.value; | ||
|
|
||
| if (Array.isArray(val)) { | ||
| // Assumption: Individual array items have the same type and roughly the same size | ||
| // probably not always true but allows us to cut down on runtime | ||
| weight += estimatePrimitiveSizeInBytes(val[0]) * val.length; | ||
| } else if (isPrimitive(val)) { | ||
| weight += estimatePrimitiveSizeInBytes(val); | ||
| } else { | ||
| // default fallback for anything else (objects) | ||
| weight += 100; | ||
| } | ||
| } | ||
| return weight; | ||
| } | ||
|
|
||
| function estimatePrimitiveSizeInBytes(value: Primitive): number { | ||
| if (typeof value === 'string') { | ||
| return value.length * 2; | ||
| } else if (typeof value === 'boolean') { | ||
| return 4; | ||
| } else if (typeof value === 'number') { | ||
| return 8; | ||
| } | ||
| return 0; | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Duplicate
|
||
|
|
||
| /** | ||
| * NOTE: We intentionally do not return anything for non-primitive values: | ||
| * - array support will come in the future but if we stringify arrays now, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,37 @@ | ||
| import { estimateTypedAttributesSizeInBytes } from '../../attributes'; | ||
| import type { SerializedStreamedSpan } from '../../types-hoist/span'; | ||
|
|
||
| /** | ||
| * Estimates the serialized byte size of a {@link SerializedStreamedSpan}. | ||
| * | ||
| * Uses 2 bytes per character as a UTF-16 approximation, and 8 bytes per number. | ||
| * The estimate is intentionally conservative and may be slightly lower than the | ||
| * actual byte size on the wire. | ||
| * We compensate for this by setting the span buffers internal limit well below the limit | ||
| * of how large an actual span v2 envelope may be. | ||
| */ | ||
| export function estimateSerializedSpanSizeInBytes(span: SerializedStreamedSpan): number { | ||
| /* | ||
| * Fixed-size fields are pre-computed as a constant for performance: | ||
| * - two timestamps (8 bytes each = 16) | ||
| * - is_segment boolean (5 bytes, assumed false for most spans) | ||
| * - trace_id – always 32 hex chars (64 bytes) | ||
| * - span_id – always 16 hex chars (32 bytes) | ||
| * - parent_span_id – 16 hex chars, assumed present for most spans (32 bytes) | ||
| * - status "ok" – most common value (8 bytes) | ||
| * = 156 bytes total base | ||
| */ | ||
| let weight = 156; | ||
| weight += span.name.length * 2; | ||
| weight += estimateTypedAttributesSizeInBytes(span.attributes); | ||
| if (span.links && span.links.length > 0) { | ||
| // Assumption: Links are roughly equal in number of attributes | ||
| // probably not always true but allows us to cut down on runtime | ||
| const firstLink = span.links[0]; | ||
| const attributes = firstLink?.attributes; | ||
| // Fixed size 100 due to span_id, trace_id and sampled flag (see above) | ||
| const linkWeight = 100 + (attributes ? estimateTypedAttributesSizeInBytes(attributes) : 0); | ||
| weight += linkWeight * span.links.length; | ||
| } | ||
| return weight; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,13 +6,16 @@ import { safeUnref } from '../../utils/timer'; | |
| import { getDynamicSamplingContextFromSpan } from '../dynamicSamplingContext'; | ||
| import type { SerializedStreamedSpanWithSegmentSpan } from './captureSpan'; | ||
| import { createStreamedSpanEnvelope } from './envelope'; | ||
| import { estimateSerializedSpanSizeInBytes } from './estimateSize'; | ||
|
|
||
| /** | ||
| * We must not send more than 1000 spans in one envelope. | ||
| * Otherwise the envelope is dropped by Relay. | ||
| */ | ||
| const MAX_SPANS_PER_ENVELOPE = 1000; | ||
|
|
||
| const MAX_TRACE_WEIGHT_IN_BYTES = 5_000_000; | ||
|
|
||
| export interface SpanBufferOptions { | ||
| /** | ||
| * Max spans per trace before auto-flush | ||
|
|
@@ -29,6 +32,14 @@ export interface SpanBufferOptions { | |
| * @default 5_000 | ||
| */ | ||
| flushInterval?: number; | ||
|
|
||
| /** | ||
| * Max accumulated byte weight of spans per trace before auto-flush. | ||
| * Size is estimated, not exact. Uses 2 bytes per character for strings (UTF-16). | ||
| * | ||
| * @default 5_000_000 (5 MB) | ||
| */ | ||
| maxTraceWeightInBytes?: number; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -45,23 +56,28 @@ export interface SpanBufferOptions { | |
| export class SpanBuffer { | ||
| /* Bucket spans by their trace id */ | ||
| private _traceMap: Map<string, Set<SerializedStreamedSpanWithSegmentSpan>>; | ||
| private _traceWeightMap: Map<string, number>; | ||
|
|
||
| private _flushIntervalId: ReturnType<typeof setInterval> | null; | ||
| private _client: Client; | ||
| private _maxSpanLimit: number; | ||
| private _flushInterval: number; | ||
| private _maxTraceWeight: number; | ||
|
|
||
| public constructor(client: Client, options?: SpanBufferOptions) { | ||
| this._traceMap = new Map(); | ||
| this._traceWeightMap = new Map(); | ||
| this._client = client; | ||
|
|
||
| const { maxSpanLimit, flushInterval } = options ?? {}; | ||
| const { maxSpanLimit, flushInterval, maxTraceWeightInBytes } = options ?? {}; | ||
|
|
||
| this._maxSpanLimit = | ||
| maxSpanLimit && maxSpanLimit > 0 && maxSpanLimit <= MAX_SPANS_PER_ENVELOPE | ||
| ? maxSpanLimit | ||
| : MAX_SPANS_PER_ENVELOPE; | ||
| this._flushInterval = flushInterval && flushInterval > 0 ? flushInterval : 5_000; | ||
| this._maxTraceWeight = | ||
| maxTraceWeightInBytes && maxTraceWeightInBytes > 0 ? maxTraceWeightInBytes : MAX_TRACE_WEIGHT_IN_BYTES; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing integration or E2E test for new featureLow Severity This Additional Locations (1)Triggered by project rule: PR Review Guidelines for Cursor Bot There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bugbot Autofix determined this is a false positive. Comprehensive unit tests exist; integration tests require span streaming infrastructure that doesn't exist in any test suite yet. This Bugbot Autofix run was free. To enable autofix for future PRs, go to the Cursor dashboard.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct, integration tests will follow soon! |
||
|
|
||
| this._flushIntervalId = null; | ||
| this._debounceFlushInterval(); | ||
|
|
@@ -77,6 +93,7 @@ export class SpanBuffer { | |
| clearInterval(this._flushIntervalId); | ||
| } | ||
| this._traceMap.clear(); | ||
| this._traceWeightMap.clear(); | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -93,7 +110,10 @@ export class SpanBuffer { | |
| this._traceMap.set(traceId, traceBucket); | ||
| } | ||
|
|
||
| if (traceBucket.size >= this._maxSpanLimit) { | ||
| const newWeight = (this._traceWeightMap.get(traceId) ?? 0) + estimateSerializedSpanSizeInBytes(spanJSON); | ||
| this._traceWeightMap.set(traceId, newWeight); | ||
|
|
||
| if (traceBucket.size >= this._maxSpanLimit || newWeight >= this._maxTraceWeight) { | ||
| this.flush(traceId); | ||
| this._debounceFlushInterval(); | ||
| } | ||
|
|
@@ -128,7 +148,7 @@ export class SpanBuffer { | |
| if (!traceBucket.size) { | ||
| // we should never get here, given we always add a span when we create a new bucket | ||
| // and delete the bucket once we flush out the trace | ||
| this._traceMap.delete(traceId); | ||
| this._removeTrace(traceId); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -137,7 +157,7 @@ export class SpanBuffer { | |
| const segmentSpan = spans[0]?._segmentSpan; | ||
| if (!segmentSpan) { | ||
| DEBUG_BUILD && debug.warn('No segment span reference found on span JSON, cannot compute DSC'); | ||
| this._traceMap.delete(traceId); | ||
| this._removeTrace(traceId); | ||
| return; | ||
| } | ||
|
|
||
|
|
@@ -157,7 +177,12 @@ export class SpanBuffer { | |
| DEBUG_BUILD && debug.error('Error while sending streamed span envelope:', reason); | ||
| }); | ||
|
|
||
| this._removeTrace(traceId); | ||
| } | ||
|
|
||
| private _removeTrace(traceId: string): void { | ||
| this._traceMap.delete(traceId); | ||
| this._traceWeightMap.delete(traceId); | ||
| } | ||
|
|
||
| private _debounceFlushInterval(): void { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,177 @@ | ||
| import { describe, expect, it } from 'vitest'; | ||
| import { estimateSerializedSpanSizeInBytes } from '../../../../src/tracing/spans/estimateSize'; | ||
| import type { SerializedStreamedSpan } from '../../../../src/types-hoist/span'; | ||
|
|
||
| // Produces a realistic trace_id (32 hex chars) and span_id (16 hex chars) | ||
| const TRACE_ID = 'a1b2c3d4e5f607189a0b1c2d3e4f5060'; | ||
| const SPAN_ID = 'a1b2c3d4e5f60718'; | ||
|
|
||
| describe('estimateSerializedSpanSizeInBytes', () => { | ||
| it('estimates a minimal span (no attributes, no links, no parent) within a reasonable range of JSON.stringify', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'GET /api/users', | ||
| start_timestamp: 1740000000.123, | ||
| end_timestamp: 1740000001.456, | ||
| status: 'ok', | ||
| is_segment: true, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBe(184); | ||
| expect(actual).toBe(196); | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
|
|
||
| it('estimates a span with a parent_span_id within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| parent_span_id: 'b2c3d4e5f6071890', | ||
| name: 'db.query', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000000.05, | ||
| status: 'ok', | ||
| is_segment: false, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBe(172); | ||
| expect(actual).toBe(222); | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.1); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.7); | ||
| }); | ||
|
|
||
| it('estimates a span with string attributes within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'GET /api/users', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000000.1, | ||
| status: 'ok', | ||
| is_segment: false, | ||
| attributes: { | ||
| 'http.method': { type: 'string', value: 'GET' }, | ||
| 'http.url': { type: 'string', value: 'https://example.com/api/users?page=1&limit=100' }, | ||
| 'http.status_code': { type: 'integer', value: 200 }, | ||
| 'db.statement': { type: 'string', value: 'SELECT * FROM users WHERE id = $1' }, | ||
| 'sentry.origin': { type: 'string', value: 'auto.http.fetch' }, | ||
| }, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
|
|
||
| it('estimates a span with numeric attributes within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'process.task', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000005.0, | ||
| status: 'ok', | ||
| is_segment: false, | ||
| attributes: { | ||
| 'items.count': { type: 'integer', value: 42 }, | ||
| 'duration.ms': { type: 'double', value: 5000.5 }, | ||
| 'retry.count': { type: 'integer', value: 3 }, | ||
| }, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
|
|
||
| it('estimates a span with boolean attributes within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'cache.get', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000000.002, | ||
| status: 'ok', | ||
| is_segment: false, | ||
| attributes: { | ||
| 'cache.hit': { type: 'boolean', value: true }, | ||
| 'cache.miss': { type: 'boolean', value: false }, | ||
| }, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
|
|
||
| it('estimates a span with array attributes within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'batch.process', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000002.0, | ||
| status: 'ok', | ||
| is_segment: false, | ||
| attributes: { | ||
| 'item.ids': { type: 'string[]', value: ['id-001', 'id-002', 'id-003', 'id-004', 'id-005'] }, | ||
| scores: { type: 'double[]', value: [1.1, 2.2, 3.3, 4.4] }, | ||
| flags: { type: 'boolean[]', value: [true, false, true] }, | ||
| }, | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
|
|
||
| it('estimates a span with links within a reasonable range', () => { | ||
| const span: SerializedStreamedSpan = { | ||
| trace_id: TRACE_ID, | ||
| span_id: SPAN_ID, | ||
| name: 'linked.operation', | ||
| start_timestamp: 1740000000.0, | ||
| end_timestamp: 1740000001.0, | ||
| status: 'ok', | ||
| is_segment: true, | ||
| links: [ | ||
| { | ||
| trace_id: 'b2c3d4e5f607189a0b1c2d3e4f506070', | ||
| span_id: 'c3d4e5f607189a0b', | ||
| sampled: true, | ||
| attributes: { | ||
| 'sentry.link.type': { type: 'string', value: 'previous_trace' }, | ||
| }, | ||
| }, | ||
| { | ||
| trace_id: 'c3d4e5f607189a0b1c2d3e4f50607080', | ||
| span_id: 'd4e5f607189a0b1c', | ||
| }, | ||
| ], | ||
| }; | ||
|
|
||
| const estimate = estimateSerializedSpanSizeInBytes(span); | ||
| const actual = JSON.stringify(span).length; | ||
|
|
||
| expect(estimate).toBeLessThanOrEqual(actual * 1.2); | ||
| expect(estimate).toBeGreaterThanOrEqual(actual * 0.8); | ||
| }); | ||
| }); |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As this is for attributes, we probably only need to account for string, boolean and number. But just for the sake of completeness, I want to mention that there would still be
symbolandbigintthat would fall through and evaluate as0(in case this ever gets relevant).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! At the moment, we'd drop these attributes on spans since they're not supported. We might want to reconsider this eventually (see #18164), in which case we'd probably stringify them. So we'd report the stringified length * 2. I'm gonna leave this as-is for now though.