Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/whole-swans-cheer.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Adds a slider to configure threshold to trigger intelligent context condensing
127 changes: 127 additions & 0 deletions src/core/sliding-window/__tests__/sliding-window.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand Down Expand Up @@ -277,6 +279,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand Down Expand Up @@ -304,6 +308,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo1.contextWindow,
maxTokens: modelInfo1.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand All @@ -313,6 +319,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo2.contextWindow,
maxTokens: modelInfo2.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand All @@ -329,6 +337,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo1.contextWindow,
maxTokens: modelInfo1.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand All @@ -338,6 +348,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo2.contextWindow,
maxTokens: modelInfo2.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand Down Expand Up @@ -369,6 +381,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(resultWithSmall).toEqual({
Expand Down Expand Up @@ -399,6 +413,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(resultWithLarge.messages).not.toEqual(messagesWithLargeContent) // Should truncate
Expand All @@ -422,6 +438,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(resultWithVeryLarge.messages).not.toEqual(messagesWithVeryLargeContent) // Should truncate
Expand All @@ -448,6 +466,8 @@ describe("truncateConversationIfNeeded", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result).toEqual({
Expand Down Expand Up @@ -488,6 +508,7 @@ describe("truncateConversationIfNeeded", () => {
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand Down Expand Up @@ -534,6 +555,7 @@ describe("truncateConversationIfNeeded", () => {
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})

Expand Down Expand Up @@ -570,6 +592,7 @@ describe("truncateConversationIfNeeded", () => {
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 50, // This shouldn't matter since autoCondenseContext is false
systemPrompt: "System prompt",
})

Expand All @@ -587,6 +610,94 @@ describe("truncateConversationIfNeeded", () => {
// Clean up
summarizeSpy.mockRestore()
})

it("should use summarizeConversation when autoCondenseContext is true and context percent exceeds threshold", async () => {
// Mock the summarizeConversation function
const mockSummary = "This is a summary of the conversation"
const mockCost = 0.05
const mockSummarizeResponse: condenseModule.SummarizeResponse = {
messages: [
{ role: "user", content: "First message" },
{ role: "assistant", content: mockSummary, isSummary: true },
{ role: "user", content: "Last message" },
],
summary: mockSummary,
cost: mockCost,
newContextTokens: 100,
}

const summarizeSpy = jest
.spyOn(condenseModule, "summarizeConversation")
.mockResolvedValue(mockSummarizeResponse)

const modelInfo = createModelInfo(100000, 30000)
// Set tokens to be below the allowedTokens threshold but above the percentage threshold
const contextWindow = modelInfo.contextWindow
const totalTokens = 60000 // Below allowedTokens but 60% of context window
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 60%
systemPrompt: "System prompt",
})

// Verify summarizeConversation was called with the right parameters
expect(summarizeSpy).toHaveBeenCalledWith(messagesWithSmallContent, mockApiHandler, "System prompt")

// Verify the result contains the summary information
expect(result).toMatchObject({
messages: mockSummarizeResponse.messages,
summary: mockSummary,
cost: mockCost,
prevContextTokens: totalTokens,
})

// Clean up
summarizeSpy.mockRestore()
})

it("should not use summarizeConversation when autoCondenseContext is true but context percent is below threshold", async () => {
// Reset any previous mock calls
jest.clearAllMocks()
const summarizeSpy = jest.spyOn(condenseModule, "summarizeConversation")

const modelInfo = createModelInfo(100000, 30000)
// Set tokens to be below both the allowedTokens threshold and the percentage threshold
const contextWindow = modelInfo.contextWindow
const totalTokens = 40000 // 40% of context window
const messagesWithSmallContent = [...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }]

const result = await truncateConversationIfNeeded({
messages: messagesWithSmallContent,
totalTokens,
contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: true,
autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
systemPrompt: "System prompt",
})

// Verify summarizeConversation was not called
expect(summarizeSpy).not.toHaveBeenCalled()

// Verify no truncation or summarization occurred
expect(result).toEqual({
messages: messagesWithSmallContent,
summary: "",
cost: 0,
prevContextTokens: totalTokens,
})

// Clean up
summarizeSpy.mockRestore()
})
})

/**
Expand Down Expand Up @@ -624,6 +735,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result1).toEqual({
Expand All @@ -640,6 +753,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result2.messages).not.toEqual(messagesWithSmallContent)
Expand All @@ -664,6 +779,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result1).toEqual({
Expand All @@ -680,6 +797,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result2.messages).not.toEqual(messagesWithSmallContent)
Expand All @@ -703,6 +822,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result1.messages).toEqual(messagesWithSmallContent)
Expand All @@ -714,6 +835,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result2).not.toEqual(messagesWithSmallContent)
Expand All @@ -735,6 +858,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result1.messages).toEqual(messagesWithSmallContent)
Expand All @@ -746,6 +871,8 @@ describe("getMaxTokens", () => {
contextWindow: modelInfo.contextWindow,
maxTokens: modelInfo.maxTokens,
apiHandler: mockApiHandler,
autoCondenseContext: false,
autoCondenseContextPercent: 100,
systemPrompt: "System prompt",
})
expect(result2).not.toEqual(messagesWithSmallContent)
Expand Down
31 changes: 20 additions & 11 deletions src/core/sliding-window/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ type TruncateOptions = {
contextWindow: number
maxTokens?: number | null
apiHandler: ApiHandler
autoCondenseContext?: boolean
autoCondenseContext: boolean
autoCondenseContextPercent: number
systemPrompt: string
}

Expand All @@ -83,6 +84,7 @@ export async function truncateConversationIfNeeded({
maxTokens,
apiHandler,
autoCondenseContext,
autoCondenseContextPercent,
systemPrompt,
}: TruncateOptions): Promise<TruncateResponse> {
// Calculate the maximum tokens reserved for response
Expand All @@ -96,21 +98,28 @@ export async function truncateConversationIfNeeded({
: await estimateTokenCount([{ type: "text", text: lastMessageContent as string }], apiHandler)

// Calculate total effective tokens (totalTokens never includes the last message)
const effectiveTokens = totalTokens + lastMessageTokens
const prevContextTokens = totalTokens + lastMessageTokens

// Calculate available tokens for conversation history
// Truncate if we're within TOKEN_BUFFER_PERCENTAGE of the context window
const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens

// Determine if truncation is needed and apply if necessary
if (effectiveTokens <= allowedTokens) {
return { messages, summary: "", cost: 0, prevContextTokens: effectiveTokens }
} else if (autoCondenseContext) {
const result = await summarizeConversation(messages, apiHandler, systemPrompt)
if (result.summary) {
return { ...result, prevContextTokens: effectiveTokens }
if (autoCondenseContext) {
const contextPercent = (100 * prevContextTokens) / contextWindow
if (contextPercent >= autoCondenseContextPercent || prevContextTokens > allowedTokens) {
// Attempt to intelligently condense the context
const result = await summarizeConversation(messages, apiHandler, systemPrompt)
if (result.summary) {
return { ...result, prevContextTokens }
}
}
}
const truncatedMessages = truncateConversation(messages, 0.5)
return { messages: truncatedMessages, prevContextTokens: effectiveTokens, summary: "", cost: 0 }

// Fall back to sliding window truncation if needed
if (prevContextTokens > allowedTokens) {
const truncatedMessages = truncateConversation(messages, 0.5)
return { messages: truncatedMessages, prevContextTokens, summary: "", cost: 0 }
}
// No truncation or condensation needed
return { messages, summary: "", cost: 0, prevContextTokens }
}
11 changes: 9 additions & 2 deletions src/core/task/Task.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1460,8 +1460,14 @@ export class Task extends EventEmitter<ClineEvents> {
}

public async *attemptApiRequest(retryAttempt: number = 0): ApiStream {
const { apiConfiguration, autoApprovalEnabled, alwaysApproveResubmit, requestDelaySeconds, experiments } =
(await this.providerRef.deref()?.getState()) ?? {}
const {
apiConfiguration,
autoApprovalEnabled,
alwaysApproveResubmit,
requestDelaySeconds,
experiments,
autoCondenseContextPercent = 100,
} = (await this.providerRef.deref()?.getState()) ?? {}

let rateLimitDelay = 0

Expand Down Expand Up @@ -1510,6 +1516,7 @@ export class Task extends EventEmitter<ClineEvents> {
contextWindow,
apiHandler: this.api,
autoCondenseContext,
autoCondenseContextPercent,
systemPrompt,
})
if (truncateResult.messages !== this.apiConversationHistory) {
Expand Down
3 changes: 3 additions & 0 deletions src/core/webview/ClineProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
alwaysAllowModeSwitch,
alwaysAllowSubtasks,
allowedMaxRequests,
autoCondenseContextPercent,
soundEnabled,
ttsEnabled,
ttsSpeed,
Expand Down Expand Up @@ -1293,6 +1294,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
alwaysAllowModeSwitch: alwaysAllowModeSwitch ?? false,
alwaysAllowSubtasks: alwaysAllowSubtasks ?? false,
allowedMaxRequests: allowedMaxRequests ?? Infinity,
autoCondenseContextPercent: autoCondenseContextPercent ?? 100,
uriScheme: vscode.env.uriScheme,
currentTaskItem: this.getCurrentCline()?.taskId
? (taskHistory || []).find((item: HistoryItem) => item.id === this.getCurrentCline()?.taskId)
Expand Down Expand Up @@ -1396,6 +1398,7 @@ export class ClineProvider extends EventEmitter<ClineProviderEvents> implements
alwaysAllowModeSwitch: stateValues.alwaysAllowModeSwitch ?? false,
alwaysAllowSubtasks: stateValues.alwaysAllowSubtasks ?? false,
allowedMaxRequests: stateValues.allowedMaxRequests ?? Infinity,
autoCondenseContextPercent: stateValues.autoCondenseContextPercent ?? 100,
taskHistory: stateValues.taskHistory,
allowedCommands: stateValues.allowedCommands,
soundEnabled: stateValues.soundEnabled ?? false,
Expand Down
Loading
Loading