From 29c9b7627d091c389d112434180ab065f97df61c Mon Sep 17 00:00:00 2001
From: Matt Rubens <mrubens@users.noreply.github.com>
Date: Tue, 15 Jul 2025 00:23:05 -0400
Subject: [PATCH 1/3] Make token logic for condensing consistent with task
 header

---
 src/core/task/Task.ts | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index c8553a8fc67..aa0590fedd9 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -41,6 +41,7 @@ import { ClineAskResponse } from "../../shared/WebviewMessage"
 import { defaultModeSlug } from "../../shared/modes"
 import { DiffStrategy } from "../../shared/tools"
 import { EXPERIMENT_IDS, experiments } from "../../shared/experiments"
+import { getModelMaxOutputTokens } from "../../shared/api"
 
 // services
 import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher"
@@ -1716,15 +1717,13 @@ export class Task extends EventEmitter<ClineEvents> {
 		const { contextTokens } = this.getTokenUsage()
 
 		if (contextTokens) {
-			// Default max tokens value for thinking models when no specific
-			// value is set.
-			const DEFAULT_THINKING_MODEL_MAX_TOKENS = 16_384
-
 			const modelInfo = this.api.getModel().info
 
-			const maxTokens = modelInfo.supportsReasoningBudget
-				? this.apiConfiguration.modelMaxTokens || DEFAULT_THINKING_MODEL_MAX_TOKENS
-				: modelInfo.maxTokens
+			const maxTokens = getModelMaxOutputTokens({
+				modelId: this.api.getModel().id,
+				model: modelInfo,
+				settings: this.apiConfiguration,
+			})
 
 			const contextWindow = modelInfo.contextWindow
 

From f9573986a5a695f13a0e735ac3434cb3968c5843 Mon Sep 17 00:00:00 2001
From: Matt Rubens <mrubens@users.noreply.github.com>
Date: Tue, 15 Jul 2025 00:23:45 -0400
Subject: [PATCH 2/3] For models where context window is equal to max tokens,
 use 20% of the context for output

---
 src/shared/api.ts | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/shared/api.ts b/src/shared/api.ts
index a6199c81aac..a1603fc7761 100644
--- a/src/shared/api.ts
+++ b/src/shared/api.ts
@@ -82,9 +82,12 @@ export const getModelMaxOutputTokens = ({
 		return ANTHROPIC_DEFAULT_MAX_TOKENS
 	}
 
-	// If maxTokens is 0 or undefined, fall back to 20% of context window
-	// This matches the sliding window logic
-	return model.maxTokens || Math.ceil(model.contextWindow * 0.2)
+	// If maxTokens is 0 or undefined or the full context window, fall back to 20% of context window
+	if (model.maxTokens && model.maxTokens !== model.contextWindow) {
+		return model.maxTokens
+	} else {
+		return Math.ceil(model.contextWindow * 0.2)
+	}
 }
 
 // GetModelsOptions

From d28587bc317794b6c7531ee146c4569cee336a8b Mon Sep 17 00:00:00 2001
From: Matt Rubens <mrubens@users.noreply.github.com>
Date: Tue, 15 Jul 2025 00:25:26 -0400
Subject: [PATCH 3/3] Add Kimi K2 to Groq

---
 packages/types/src/providers/groq.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/packages/types/src/providers/groq.ts b/packages/types/src/providers/groq.ts
index 49667e357ef..a3fc284bb50 100644
--- a/packages/types/src/providers/groq.ts
+++ b/packages/types/src/providers/groq.ts
@@ -10,6 +10,7 @@ export type GroqModelId =
 	| "qwen-qwq-32b"
 	| "qwen/qwen3-32b"
 	| "deepseek-r1-distill-llama-70b"
+	| "moonshotai/kimi-k2-instruct"
 
 export const groqDefaultModelId: GroqModelId = "llama-3.3-70b-versatile" // Defaulting to Llama3 70B Versatile
 
@@ -87,4 +88,13 @@ export const groqModels = {
 		outputPrice: 0.99,
 		description: "DeepSeek R1 Distill Llama 70B model, 128K context.",
 	},
+	"moonshotai/kimi-k2-instruct": {
+		maxTokens: 131072,
+		contextWindow: 131072,
+		supportsImages: false,
+		supportsPromptCache: false,
+		inputPrice: 1.0,
+		outputPrice: 3.0,
+		description: "Moonshot AI Kimi K2 Instruct 1T model, 128K context.",
+	},
 } as const satisfies Record<string, ModelInfo>