diff --git a/.changeset/friendly-agents-wait.md b/.changeset/friendly-agents-wait.md new file mode 100644 index 000000000..cd6be9067 --- /dev/null +++ b/.changeset/friendly-agents-wait.md @@ -0,0 +1,5 @@ +--- +"@voltagent/core": patch +--- + +Honor provider Retry-After headers when retrying failed model calls. diff --git a/packages/core/src/agent/agent.spec.ts b/packages/core/src/agent/agent.spec.ts index ca7bf4ac8..152c8c039 100644 --- a/packages/core/src/agent/agent.spec.ts +++ b/packages/core/src/agent/agent.spec.ts @@ -3689,6 +3689,95 @@ Use pandas and summarize findings.`.split("\n"), } }); + it("should honor Retry-After when retrying provider rate limits", async () => { + vi.useFakeTimers(); + const setTimeoutSpy = vi.spyOn(globalThis, "setTimeout"); + + let resolveRetry!: () => void; + const retrySeen = new Promise((resolve) => { + resolveRetry = resolve; + }); + const onRetry = vi.fn(() => { + resolveRetry(); + }); + const agent = new Agent({ + name: "RetryAfterAgent", + instructions: "Test", + model: mockModel as any, + maxRetries: 1, + hooks: createHooks({ onRetry }), + }); + + const mockResponse = { + text: "Retry response", + content: [{ type: "text", text: "Retry response" }], + reasoning: [], + files: [], + sources: [], + toolCalls: [], + toolResults: [], + finishReason: "stop", + usage: { + inputTokens: 10, + outputTokens: 5, + totalTokens: 15, + }, + warnings: [], + request: {}, + response: { + id: "retry-response", + modelId: "test-model", + timestamp: new Date(), + messages: [], + }, + steps: [], + }; + + let callCount = 0; + vi.mocked(ai.generateText).mockImplementation(async () => { + callCount += 1; + if (callCount === 1) { + const error = new Error("Rate limited"); + (error as any).isRetryable = true; + (error as any).statusCode = 429; + (error as any).headers = new Headers({ "retry-after": "3" }); + throw error; + } + return mockResponse as any; + }); + + const resultPromise = agent.generateText("Test"); + + try { + await retrySeen; + await Promise.resolve(); + + expect(onRetry).toHaveBeenCalledTimes(1); + expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(1); + expect(setTimeoutSpy).toHaveBeenCalledWith(expect.any(Function), 3000); + + await vi.advanceTimersByTimeAsync(3000); + await expect(resultPromise).resolves.toMatchObject({ text: "Retry response" }); + expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(2); + } finally { + setTimeoutSpy.mockRestore(); + vi.useRealTimers(); + } + }); + + it("should clamp oversized Retry-After values to Node's max timer delay", async () => { + const agent = new Agent({ + name: "RetryAfterClampAgent", + instructions: "Test", + model: mockModel as any, + }); + + const error = new Error("Rate limited"); + (error as any).headers = new Headers({ "retry-after": "9999999999" }); + + expect((agent as any).getRetryAfterDelayMs(error)).toBe(2_147_483_647); + }); + it("should handle model errors gracefully", async () => { const agent = new Agent({ name: "TestAgent", diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index 6c1a1a25b..32ac91afa 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -227,6 +227,7 @@ const DEFAULT_CONVERSATION_TITLE_MAX_OUTPUT_TOKENS = 32; const DEFAULT_CONVERSATION_TITLE_MAX_CHARS = 80; const CONVERSATION_TITLE_INPUT_MAX_CHARS = 2000; const DEFAULT_TOOL_SEARCH_TOP_K = 1; +const MAX_NODE_TIMER_MS = 2_147_483_647; type ResolvedConversationPersistenceOptions = { mode: AgentConversationPersistenceMode; @@ -5738,6 +5739,30 @@ export class Agent { return true; } + private getRetryAfterDelayMs(error: unknown): number | undefined { + const headers = (error as { headers?: Headers | Record } | undefined)?.headers; + const retryAfter = + headers instanceof Headers + ? headers.get("retry-after") + : (headers?.["retry-after"] ?? headers?.["Retry-After"]); + + if (!retryAfter) { + return undefined; + } + + const seconds = Number.parseInt(retryAfter, 10); + if (Number.isFinite(seconds) && seconds > 0) { + return Math.min(seconds * 1000, MAX_NODE_TIMER_MS); + } + + const retryAt = Date.parse(retryAfter); + if (Number.isFinite(retryAt)) { + return Math.min(Math.max(retryAt - Date.now(), 0), MAX_NODE_TIMER_MS); + } + + return undefined; + } + private async executeWithModelFallback({ oc, operation, @@ -5885,7 +5910,10 @@ export class Agent { const canRetry = retryEligible && !isLastAttempt; if (canRetry) { - const retryDelayMs = Math.min(1000 * 2 ** attemptIndex, 10000); + const retryDelayMs = Math.min( + this.getRetryAfterDelayMs(error) ?? Math.min(1000 * 2 ** attemptIndex, 10000), + MAX_NODE_TIMER_MS, + ); logger.debug(`[Agent:${this.name}] - Model attempt failed, retrying`, { operation, modelName,