diff --git a/.changeset/description-and-resolution.md b/.changeset/description-and-resolution.md new file mode 100644 index 0000000..1643ba3 --- /dev/null +++ b/.changeset/description-and-resolution.md @@ -0,0 +1,10 @@ +--- +"@iqai/alert-logger": minor +--- + +feat: add `description` option and fix resolution noise + +- Add `description` field to `AlertOptions` for separating short titles from detailed messages. When set, `description` is used as the embed body instead of the title. +- Allow `error()` and `critical()` to accept `(title, options)` without an intermediate `undefined` error param. +- Resolution notifications now only fire for sustained incidents (count > rampThreshold). One-off or sporadic alerts no longer produce "Resolved" messages. +- NestJS exception filter uses `{METHOD} {PATH}` as the alert title instead of the full error message. diff --git a/src/core/aggregator.test.ts b/src/core/aggregator.test.ts index 2add9b8..4ae07cd 100644 --- a/src/core/aggregator.test.ts +++ b/src/core/aggregator.test.ts @@ -170,8 +170,25 @@ describe('Aggregator', () => { }) describe('resolution detection', () => { - it('detects entries past the cooldown as resolved', () => { - aggregator.process('err-resolve') + function processNTimes(fp: string, n: number) { + for (let i = 0; i < n; i++) { + aggregator.process(fp) + } + } + + it('does not resolve alerts below rampThreshold', () => { + // Process a few times (below default rampThreshold of 64) + processNTimes('err-few', 10) + + vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) + + const resolved = aggregator.checkResolutions() + expect(resolved).toHaveLength(0) + }) + + it('resolves alerts that exceeded rampThreshold (sustained crisis)', () => { + // Process more than rampThreshold (default 64) + processNTimes('err-resolve', 65) // Advance past resolution cooldown (default 2 minutes) vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) @@ -179,7 +196,7 @@ describe('Aggregator', () => { const resolved = aggregator.checkResolutions() expect(resolved).toHaveLength(1) expect(resolved[0].fingerprint).toBe('err-resolve') - expect(resolved[0].count).toBe(1) + expect(resolved[0].count).toBe(65) }) it('does not resolve entries that are still active', () => { @@ -193,7 +210,7 @@ describe('Aggregator', () => { }) it('does not re-resolve already resolved entries', () => { - aggregator.process('err-once') + processNTimes('err-re-resolve', 65) vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) @@ -207,10 +224,10 @@ describe('Aggregator', () => { it('includes peakRate and timing info in resolved entries', () => { const startTime = Date.now() - aggregator.process('err-info') + processNTimes('err-info', 64) vi.advanceTimersByTime(1000) - aggregator.process('err-info') + aggregator.process('err-info') // count=65, exceeds rampThreshold vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) @@ -222,8 +239,14 @@ describe('Aggregator', () => { }) describe('state cleanup', () => { + function processNTimes(fp: string, n: number) { + for (let i = 0; i < n; i++) { + aggregator.process(fp) + } + } + it('evicts entries after resolution + grace period (5 minutes)', () => { - aggregator.process('err-evict') + processNTimes('err-evict', 65) // Advance past cooldown to trigger resolution vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) @@ -239,8 +262,25 @@ describe('Aggregator', () => { expect(result.count).toBe(1) }) + it('evicts low-count entries after cooldown + grace period', () => { + aggregator.process('err-low-evict') + + // Advance past cooldown — marks as resolution (but no resolved notification) + vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) + aggregator.checkResolutions() + + // Advance past grace period + vi.advanceTimersByTime(5 * 60_000 + 1) + aggregator.checkResolutions() + + // Should create a fresh onset + const result = aggregator.process('err-low-evict') + expect(result.phase).toBe('onset') + expect(result.count).toBe(1) + }) + it('does not evict entries before grace period expires', () => { - aggregator.process('err-keep') + processNTimes('err-keep', 65) vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 1) aggregator.checkResolutions() @@ -250,17 +290,15 @@ describe('Aggregator', () => { aggregator.checkResolutions() // Processing should NOT create a fresh onset (state still exists in resolution) - // The entry is still in the map with phase 'resolution', but process() will - // increment the existing state const result = aggregator.process('err-keep') - expect(result.count).toBe(2) + expect(result.count).toBe(66) }) }) describe('startResolutionTimer', () => { - it('calls onResolved callback for resolved entries', () => { + it('calls onResolved callback for sustained alerts', () => { const onResolved = vi.fn() - aggregator.process('err-timer') + for (let i = 0; i < 65; i++) aggregator.process('err-timer') aggregator.startResolutionTimer(onResolved) // Advance past cooldown + the 30s check interval @@ -269,6 +307,19 @@ describe('Aggregator', () => { expect(onResolved).toHaveBeenCalledWith(expect.objectContaining({ fingerprint: 'err-timer' })) }) + it('does not call onResolved for low-count entries', () => { + const onResolved = vi.fn() + aggregator.process('err-low') + aggregator.process('err-low') + aggregator.process('err-low') + aggregator.startResolutionTimer(onResolved) + + // Advance past cooldown + the 30s check interval + vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 30_000 + 1) + + expect(onResolved).not.toHaveBeenCalled() + }) + it('does not start multiple timers', () => { const onResolved1 = vi.fn() const onResolved2 = vi.fn() @@ -276,7 +327,7 @@ describe('Aggregator', () => { aggregator.startResolutionTimer(onResolved1) aggregator.startResolutionTimer(onResolved2) - aggregator.process('err-multi') + for (let i = 0; i < 65; i++) aggregator.process('err-multi') vi.advanceTimersByTime(DEFAULT_AGGREGATION.resolutionCooldownMs + 30_000 + 1) diff --git a/src/core/aggregator.ts b/src/core/aggregator.ts index 094a240..ebfc8b7 100644 --- a/src/core/aggregator.ts +++ b/src/core/aggregator.ts @@ -144,14 +144,20 @@ export class Aggregator { } if (now - state.lastSeen >= this.config.resolutionCooldownMs) { + // Only send resolution for alerts that reached the sustained phase + // (count > rampThreshold). A few sporadic failures aren't a "crisis" + // worth announcing as resolved — resolution is for ongoing incidents + // that generated a flood of alerts and then stopped. + if (state.count > this.config.rampThreshold) { + resolved.push({ + fingerprint, + count: state.count, + firstSeen: state.firstSeen, + lastSeen: state.lastSeen, + peakRate: state.peakRate, + }) + } state.phase = 'resolution' - resolved.push({ - fingerprint, - count: state.count, - firstSeen: state.firstSeen, - lastSeen: state.lastSeen, - peakRate: state.peakRate, - }) } }) diff --git a/src/core/alert-logger.test.ts b/src/core/alert-logger.test.ts index a717de4..7231328 100644 --- a/src/core/alert-logger.test.ts +++ b/src/core/alert-logger.test.ts @@ -139,6 +139,72 @@ describe('AlertLogger integration', () => { }) }) + describe('description option', () => { + it('uses description as message when provided on error()', async () => { + const adapter = new MockAdapter() + const logger = AlertLogger.init({ + adapters: [adapter], + serviceName: 'test-service', + environment: 'production', + }) + + logger.error('Market Resolution Failed', { + description: 'Market abc-123 resolution failed, state reset to NONE', + }) + + await vi.waitFor(() => expect(adapter.sent.length).toBe(1)) + expect(adapter.sent[0].title).toBe('Market Resolution Failed') + expect(adapter.sent[0].message).toBe('Market abc-123 resolution failed, state reset to NONE') + }) + + it('uses description as message when provided on warn()', async () => { + const adapter = new MockAdapter() + const logger = AlertLogger.init({ + adapters: [adapter], + serviceName: 'test-service', + environment: 'production', + }) + + logger.warn('Indexer Health Changed', { + description: 'Status: healthy → unhealthy', + }) + + await vi.waitFor(() => expect(adapter.sent.length).toBe(1)) + expect(adapter.sent[0].title).toBe('Indexer Health Changed') + expect(adapter.sent[0].message).toBe('Status: healthy → unhealthy') + }) + + it('falls back to title when no description or error', async () => { + const adapter = new MockAdapter() + const logger = AlertLogger.init({ + adapters: [adapter], + serviceName: 'test-service', + environment: 'production', + }) + + logger.error('Something broke') + + await vi.waitFor(() => expect(adapter.sent.length).toBe(1)) + expect(adapter.sent[0].message).toBe('Something broke') + }) + + it('description takes priority over error.message', async () => { + const adapter = new MockAdapter() + const logger = AlertLogger.init({ + adapters: [adapter], + serviceName: 'test-service', + environment: 'production', + }) + + logger.error('DB Failed', new Error('ECONNREFUSED'), { + description: 'Custom description here', + }) + + await vi.waitFor(() => expect(adapter.sent.length).toBe(1)) + expect(adapter.sent[0].message).toBe('Custom description here') + }) + }) + describe('singleton', () => { it('init() sets instance and getInstance() retrieves it', () => { const adapter = new MockAdapter() diff --git a/src/core/alert-logger.ts b/src/core/alert-logger.ts index 966813b..98ba165 100644 --- a/src/core/alert-logger.ts +++ b/src/core/alert-logger.ts @@ -76,22 +76,24 @@ export class AlertLogger { } info(title: string, options?: AlertOptions): void { - this.log('info', title, title, undefined, options) + const message = options?.description ?? title + this.log('info', title, message, undefined, options) } warn(title: string, options?: AlertOptions): void { - this.log('warning', title, title, undefined, options) + const message = options?.description ?? title + this.log('warning', title, message, undefined, options) } - error(title: string, error?: Error | string, options?: AlertOptions): void { + error(title: string, error?: Error | string | AlertOptions, options?: AlertOptions): void { const [err, opts] = this.normalizeErrorArgs(error, options) - const message = err?.message ?? title + const message = opts?.description ?? err?.message ?? title this.log('critical', title, message, err, opts) } - critical(title: string, error?: Error | string, options?: AlertOptions): void { + critical(title: string, error?: Error | string | AlertOptions, options?: AlertOptions): void { const [err, opts] = this.normalizeErrorArgs(error, options) - const message = err?.message ?? title + const message = opts?.description ?? err?.message ?? title this.log('critical', title, message, err, opts) } @@ -227,9 +229,13 @@ export class AlertLogger { } private normalizeErrorArgs( - error?: Error | string, + error?: Error | string | AlertOptions, options?: AlertOptions, ): [Error | undefined, AlertOptions | undefined] { + // Allow error("title", { description: ... }) without an intermediate undefined + if (error != null && typeof error === 'object' && !(error instanceof Error)) { + return [undefined, error] + } if (typeof error === 'string') { return [new Error(error), options] } diff --git a/src/core/types.ts b/src/core/types.ts index a3cc979..a0df041 100644 --- a/src/core/types.ts +++ b/src/core/types.ts @@ -3,6 +3,8 @@ export type AlertLevel = 'info' | 'warning' | 'critical' export type AggregationPhase = 'onset' | 'ramp' | 'sustained' | 'resolution' export interface AlertOptions { + /** Detailed message shown in the embed body/description. When omitted, the title is used. */ + description?: string fields?: Record tags?: string[] dedupKey?: string diff --git a/src/integrations/nestjs/alert-logger.service.ts b/src/integrations/nestjs/alert-logger.service.ts index e14bf23..453184e 100644 --- a/src/integrations/nestjs/alert-logger.service.ts +++ b/src/integrations/nestjs/alert-logger.service.ts @@ -16,12 +16,21 @@ export class AlertLoggerService { this.logger.warn(title, this.mergeContext(options)) } - error(title: string, error?: Error | string, options?: AlertOptions): void { - this.logger.error(title, error, this.mergeContext(options)) + error(title: string, error?: Error | string | AlertOptions, options?: AlertOptions): void { + // When called as error("title", { ... }), pass through directly + if (error != null && typeof error === 'object' && !(error instanceof Error)) { + this.logger.error(title, this.mergeContext(error)) + } else { + this.logger.error(title, error, this.mergeContext(options)) + } } - critical(title: string, error?: Error | string, options?: AlertOptions): void { - this.logger.critical(title, error, this.mergeContext(options)) + critical(title: string, error?: Error | string | AlertOptions, options?: AlertOptions): void { + if (error != null && typeof error === 'object' && !(error instanceof Error)) { + this.logger.critical(title, this.mergeContext(error)) + } else { + this.logger.critical(title, error, this.mergeContext(options)) + } } private mergeContext(options?: AlertOptions): AlertOptions { diff --git a/src/integrations/nestjs/exception.filter.ts b/src/integrations/nestjs/exception.filter.ts index 5ebe545..ce89ef5 100644 --- a/src/integrations/nestjs/exception.filter.ts +++ b/src/integrations/nestjs/exception.filter.ts @@ -24,18 +24,20 @@ export class AlertExceptionFilter extends BaseExceptionFilter { const path = request?.url ?? 'UNKNOWN' const ip = request?.ip ?? 'UNKNOWN' + const title = `${method} ${path}` + if (exception instanceof HttpException) { const statusCode = exception.getStatus() if (statusCode >= 500) { - this.alert.error(exception.message, exception, { + this.alert.error(title, exception, { fields: { method, path, statusCode, ip }, }) } } else { const error = exception instanceof Error ? exception : new Error(String(exception)) - this.alert.critical(error.message, error, { + this.alert.critical(title, error, { fields: { method, path, statusCode: 500, ip }, }) }