From 29c9b7627d091c389d112434180ab065f97df61c Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Tue, 15 Jul 2025 00:23:05 -0400 Subject: [PATCH 1/3] Make token logic for condensing consistent with task header --- src/core/task/Task.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index c8553a8fc67..aa0590fedd9 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -41,6 +41,7 @@ import { ClineAskResponse } from "../../shared/WebviewMessage" import { defaultModeSlug } from "../../shared/modes" import { DiffStrategy } from "../../shared/tools" import { EXPERIMENT_IDS, experiments } from "../../shared/experiments" +import { getModelMaxOutputTokens } from "../../shared/api" // services import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher" @@ -1716,15 +1717,13 @@ export class Task extends EventEmitter { const { contextTokens } = this.getTokenUsage() if (contextTokens) { - // Default max tokens value for thinking models when no specific - // value is set. - const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384 - const modelInfo = this.api.getModel().info - const maxTokens = modelInfo.supportsReasoningBudget - ? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS - : modelInfo.maxTokens + const maxTokens = getModelMaxOutputTokens({ + modelId: this.api.getModel().id, + model: modelInfo, + settings: this.apiConfiguration, + }) const contextWindow = modelInfo.contextWindow From f9573986a5a695f13a0e735ac3434cb3968c5843 Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Tue, 15 Jul 2025 00:23:45 -0400 Subject: [PATCH 2/3] For models where context window is equal to max tokens, use 20% of the context for output --- src/shared/api.ts | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/shared/api.ts b/src/shared/api.ts index a6199c81aac..a1603fc7761 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -82,9 +82,12 @@ export const getModelMaxOutputTokens = ({ return ANTHROPIC_DEFAULT_MAX_TOKENS } - // If maxTokens is 0 or undefined, fall back to 20% of context window - // This matches the sliding window logic - return model.maxTokens || Math.ceil(model.contextWindow * 0.2) + // If maxTokens is 0 or undefined or the full context window, fall back to 20% of context window + if (model.maxTokens && model.maxTokens !== model.contextWindow) { + return model.maxTokens + } else { + return Math.ceil(model.contextWindow * 0.2) + } } // GetModelsOptions From d28587bc317794b6c7531ee146c4569cee336a8b Mon Sep 17 00:00:00 2001 From: Matt Rubens Date: Tue, 15 Jul 2025 00:25:26 -0400 Subject: [PATCH 3/3] Add Kimi K2 to Groq --- packages/types/src/providers/groq.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts index 49667e357ef..a3fc284bb50 100644 --- a/packages/types/src/providers/groq.ts +++ b/packages/types/src/providers/groq.ts @@ -10,6 +10,7 @@ export type GroqModelId = | "qwen-qwq-32b" | "qwen/qwen3-32b" | "deepseek-r1-distill-llama-70b" + | "moonshotai/kimi-k2-instruct" export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defaulting to Llama3 70B Versatile @@ -87,4 +88,13 @@ export const groqModels = { outputPrice: 0.99, description: "DeepSeek R1 Distill Llama 70B model, 128K context.", }, + "moonshotai/kimi-k2-instruct": { + maxTokens: 131072, + contextWindow: 131072, + supportsImages: false, + supportsPromptCache: false, + inputPrice: 1.0, + outputPrice: 3.0, + description: "Moonshot AI Kimi K2 Instruct 1T model, 128K context.", + }, } as const satisfies Record