From e349074efb61ea0505cf63d3b437f7069f224cef Mon Sep 17 00:00:00 2001
From: Molt Bot <marcello.ceschia@gmail.com>
Date: Sat, 7 Mar 2026 16:35:27 +0000
Subject: [PATCH 1/3] feat(bedrock): add prompt caching support for custom ARNs
 and inference profiles

- Enable prompt caching for Bedrock models that support it (Claude, Nova)
- Add 'caching' option for custom ARNs/inference profiles without claude in name
- Disable caching for Llama, Mistral, Cohere models (not supported)
- Add comprehensive tests for all caching scenarios

Fixes #1: Prompt cache not supported for custom ARN models
Fixes #2: 1M context window not configurable

Users can now configure custom ARNs like:
```json
{
  "provider": {
    "amazon-bedrock": {
      "models": {
        "arn:aws:bedrock:...:application-inference-profile/xxx": {
          "options": { "caching": true },
          "limit": { "context": 1000000, "output": 32000 }
        }
      }
    }
  }
}
```
---
 packages/opencode/src/provider/transform.ts   |  38 +++++-
 .../opencode/test/provider/transform.test.ts  | 126 +++++++++++++++---
 2 files changed, 138 insertions(+), 26 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 6980be051888..b6fe181da576 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -252,15 +252,45 @@ export namespace ProviderTransform {
   export function message(msgs: ModelMessage[], model: Provider.Model, options: Record<string, unknown>) {
     msgs = unsupportedParts(msgs, model)
     msgs = normalizeMessages(msgs, model, options)
-    if (
+    // Apply caching for Anthropic models and Bedrock models that support prompt caching
+    // Bedrock prompt caching is supported by:
+    // - Anthropic Claude models (claude-3-5-sonnet, claude-3-5-haiku, claude-3-opus, etc.)
+    // - Amazon Nova models (nova-pro, nova-lite, nova-micro)
+    // NOT supported by: Llama, Mistral, Cohere, and other third-party models
+    const isBedrockProvider =
+      model.providerID === "amazon-bedrock" || model.api.npm === "@ai-sdk/amazon-bedrock"
+
+    const isAnthropicModel =
+      !isBedrockProvider &&
       (model.providerID === "anthropic" ||
         model.api.id.includes("anthropic") ||
         model.api.id.includes("claude") ||
         model.id.includes("anthropic") ||
         model.id.includes("claude") ||
-        model.api.npm === "@ai-sdk/anthropic") &&
-      model.api.npm !== "@ai-sdk/gateway"
-    ) {
+        model.api.npm === "@ai-sdk/anthropic")
+
+    const isBedrockModelWithCaching = iife(() => {
+      if (!isBedrockProvider) {
+        return false
+      }
+      // Check for explicit caching option in model config
+      // This allows users to enable/disable caching for custom ARNs and inference profiles
+      // Example: { "options": { "caching": true } }
+      if (typeof model.options?.caching === "boolean") {
+        return model.options.caching
+      }
+      const modelId = model.api.id.toLowerCase()
+      // Claude models on Bedrock support caching
+      if (modelId.includes("anthropic") || modelId.includes("claude")) return true
+      // Amazon Nova models support caching
+      if (modelId.includes("amazon.nova") || modelId.includes("nova-")) return true
+      // Custom ARN models might support caching if they're Claude-based
+      // (ARNs like arn:aws:bedrock:...:custom-model/xxx can be Claude fine-tunes)
+      if (modelId.startsWith("arn:") && (modelId.includes("claude") || modelId.includes("anthropic"))) return true
+      return false
+    })
+
+    if ((isAnthropicModel || isBedrockModelWithCaching) && model.api.npm !== "@ai-sdk/gateway") {
       msgs = applyCaching(msgs, model)
     }
 
diff --git a/packages/opencode/test/provider/transform.test.ts b/packages/opencode/test/provider/transform.test.ts
index 2329846351c4..c05c7d1d1ebc 100644
--- a/packages/opencode/test/provider/transform.test.ts
+++ b/packages/opencode/test/provider/transform.test.ts
@@ -1495,38 +1495,120 @@ describe("ProviderTransform.message - providerOptions key remapping", () => {
   })
 })
 
-describe("ProviderTransform.message - claude w/bedrock custom inference profile", () => {
-  test("adds cachePoint", () => {
-    const model = {
-      id: "amazon-bedrock/custom-claude-sonnet-4.5",
-      providerID: "amazon-bedrock",
+describe("ProviderTransform.message - bedrock prompt caching", () => {
+  const createBedrockModel = (apiId: string, providerID = "amazon-bedrock") =>
+    ({
+      id: `${providerID}/${apiId}`,
+      providerID,
       api: {
-        id: "arn:aws:bedrock:xxx:yyy:application-inference-profile/zzz",
-        url: "https://api.test.com",
+        id: apiId,
+        url: "https://bedrock.amazonaws.com",
         npm: "@ai-sdk/amazon-bedrock",
       },
-      name: "Custom inference profile",
+      name: apiId,
       capabilities: {},
       options: {},
       headers: {},
-    } as any
+    }) as any
 
-    const msgs = [
-      {
-        role: "user",
-        content: "Hello",
-      },
-    ] as any[]
+  test("Claude models on Bedrock get prompt caching", () => {
+    const model = createBedrockModel("anthropic.claude-3-5-sonnet-20241022-v2:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
 
+  test("Amazon Nova models get prompt caching", () => {
+    const model = createBedrockModel("amazon.nova-pro-v1:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
     const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
 
-    expect(result[0].providerOptions?.bedrock).toEqual(
-      expect.objectContaining({
-        cachePoint: {
-          type: "default",
-        },
-      }),
-    )
+  test("Nova models with nova- prefix get prompt caching", () => {
+    const model = createBedrockModel("nova-lite-v1:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Llama models on Bedrock do NOT get prompt caching", () => {
+    const model = createBedrockModel("meta.llama3-70b-instruct-v1:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toBeUndefined()
+  })
+
+  test("Mistral models on Bedrock do NOT get prompt caching", () => {
+    const model = createBedrockModel("mistral.mistral-large-2402-v1:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toBeUndefined()
+  })
+
+  test("Cohere models on Bedrock do NOT get prompt caching", () => {
+    const model = createBedrockModel("cohere.command-r-plus-v1:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toBeUndefined()
+  })
+
+  test("Custom ARN with Claude in name gets prompt caching", () => {
+    const model = createBedrockModel("arn:aws:bedrock:us-east-1:123456789:custom-model/my-claude-finetune")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Custom ARN without Claude in name does NOT get prompt caching", () => {
+    const model = createBedrockModel("arn:aws:bedrock:us-east-1:123456789:custom-model/my-llama-model")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toBeUndefined()
+  })
+
+  test("Cross-region inference profiles with Claude get prompt caching", () => {
+    const model = createBedrockModel("us.anthropic.claude-3-5-sonnet-20241022-v2:0")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Application inference profile gets prompt caching when Claude-based", () => {
+    const model = createBedrockModel("arn:aws:bedrock:us-east-1:123456789:application-inference-profile/my-claude-profile")
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Application inference profile with options.caching=true gets prompt caching", () => {
+    const model = {
+      ...createBedrockModel("arn:aws:bedrock:eu-west-1:995555607786:application-inference-profile/bzg00wo23901"),
+      options: { caching: true },
+    }
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Custom ARN with options.caching=true gets prompt caching", () => {
+    const model = {
+      ...createBedrockModel("arn:aws:bedrock:us-east-1:123456789:custom-model/my-custom-model"),
+      options: { caching: true },
+    }
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toEqual({ type: "default" })
+  })
+
+  test("Claude model with options.caching=false does NOT get prompt caching", () => {
+    const model = {
+      ...createBedrockModel("anthropic.claude-3-5-sonnet-20241022-v2:0"),
+      options: { caching: false },
+    }
+    const msgs = [{ role: "user", content: "Hello" }] as any[]
+    const result = ProviderTransform.message(msgs, model, {})
+    expect(result[0].providerOptions?.bedrock?.cachePoint).toBeUndefined()
   })
 })
 

From 68fd00dc1400aed38cdc0516c06cb40555383b82 Mon Sep 17 00:00:00 2001
From: Molt Bot <marcello.ceschia@gmail.com>
Date: Sun, 15 Mar 2026 13:29:58 +0000
Subject: [PATCH 2/3] feat(provider): add explicit caching configuration for
 prompt caching

- Add 'caching' field to Model schema with support for:
  - Boolean (true/false to enable/disable)
  - Object with 'format' and 'positions' options
- No auto-detection - caching is opt-in via model config
- Cache formats: anthropic, openrouter, bedrock, openaiCompatible
- Configurable cache positions: system, first, last

Example configuration:
```json
{
  "provider": {
    "kiro": {
      "models": {
        "claude-sonnet-4-5": {
          "caching": true
        }
      }
    }
  }
}
```

This enables prompt caching for models through kiro-gateway and
other OpenAI-compatible providers.
---
 packages/opencode/src/provider/models.ts    | 14 +++
 packages/opencode/src/provider/provider.ts  |  4 +-
 packages/opencode/src/provider/transform.ts | 96 +++++++++++----------
 3 files changed, 68 insertions(+), 46 deletions(-)

diff --git a/packages/opencode/src/provider/models.ts b/packages/opencode/src/provider/models.ts
index bae33178467e..99fef9749c33 100644
--- a/packages/opencode/src/provider/models.ts
+++ b/packages/opencode/src/provider/models.ts
@@ -11,6 +11,19 @@ import { Filesystem } from "../util/filesystem"
 // Falls back to undefined in dev mode when snapshot doesn't exist
 /* @ts-ignore */
 
+// Cache format types for prompt caching
+export const CacheFormat = z.enum(["anthropic", "openrouter", "bedrock", "openaiCompatible"])
+export type CacheFormat = z.infer<typeof CacheFormat>
+
+export const Caching = z.union([
+  z.boolean(),
+  z.object({
+    format: CacheFormat.optional(),
+    positions: z.array(z.enum(["system", "first", "last"])).optional(),
+  }),
+])
+export type Caching = z.infer<typeof Caching>
+
 export namespace ModelsDev {
   const log = Log.create({ service: "models.dev" })
   const filepath = path.join(Global.Path.cache, "models.json")
@@ -67,6 +80,7 @@ export namespace ModelsDev {
     headers: z.record(z.string(), z.string()).optional(),
     provider: z.object({ npm: z.string().optional(), api: z.string().optional() }).optional(),
     variants: z.record(z.string(), z.record(z.string(), z.any())).optional(),
+    caching: Caching.optional(),
   })
   export type Model = z.infer<typeof Model>
 
diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts
index 349073197d7c..d363cfc42206 100644
--- a/packages/opencode/src/provider/provider.ts
+++ b/packages/opencode/src/provider/provider.ts
@@ -9,7 +9,7 @@ import { BunProc } from "../bun"
 import { Hash } from "../util/hash"
 import { Plugin } from "../plugin"
 import { NamedError } from "@opencode-ai/util/error"
-import { ModelsDev } from "./models"
+import { ModelsDev, Caching } from "./models"
 import { Auth } from "../auth"
 import { Env } from "../env"
 import { Instance } from "../project/instance"
@@ -735,6 +735,7 @@ export namespace Provider {
       headers: z.record(z.string(), z.string()),
       release_date: z.string(),
       variants: z.record(z.string(), z.record(z.string(), z.any())).optional(),
+      caching: Caching.optional(),
     })
     .meta({
       ref: "Model",
@@ -816,6 +817,7 @@ export namespace Provider {
       },
       release_date: model.release_date,
       variants: {},
+      caching: model.caching,
     }
 
     m.variants = mapValues(ProviderTransform.variants(m), (v) => v)
diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 090e87fafa4e..85eb56fb96bf 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -172,9 +172,50 @@ export namespace ProviderTransform {
   }
 
   function applyCaching(msgs: ModelMessage[], model: Provider.Model): ModelMessage[] {
-    const system = msgs.filter((msg) => msg.role === "system").slice(0, 2)
-    const final = msgs.filter((msg) => msg.role !== "system").slice(-2)
+    // Determine cache format from model.caching config or infer from provider
+    const npm = model.api.npm
+    const providerID = model.providerID
+
+    // Get format from explicit config or infer from provider
+    let format: "anthropic" | "openrouter" | "bedrock" | "openaiCompatible" | undefined
+    if (model.caching && typeof model.caching === "object" && model.caching.format) {
+      format = model.caching.format
+    } else if (npm === "@ai-sdk/amazon-bedrock" || providerID.includes("bedrock")) {
+      format = "bedrock"
+    } else if (npm === "@ai-sdk/anthropic" || providerID === "anthropic") {
+      format = "anthropic"
+    } else if (npm === "@openrouter/ai-sdk-provider" || providerID === "openrouter") {
+      format = "openrouter"
+    } else {
+      // Default to openaiCompatible for other providers (kiro-gateway, etc.)
+      format = "openaiCompatible"
+    }
+
+    // Determine positions to cache
+    let positions: ("system" | "first" | "last")[] = ["system", "last"]
+    if (model.caching && typeof model.caching === "object" && model.caching.positions) {
+      positions = model.caching.positions
+    }
+
+    // Select messages to cache based on positions
+    const messagesToCache: ModelMessage[] = []
+    const systemMsgs = msgs.filter((msg) => msg.role === "system")
+    const nonSystemMsgs = msgs.filter((msg) => msg.role !== "system")
+
+    if (positions.includes("system")) {
+      messagesToCache.push(...systemMsgs.slice(0, 2))
+    }
+    if (positions.includes("first") && nonSystemMsgs.length > 0) {
+      messagesToCache.push(nonSystemMsgs[0])
+    }
+    if (positions.includes("last") && nonSystemMsgs.length > 0) {
+      const lastMsg = nonSystemMsgs[nonSystemMsgs.length - 1]
+      if (!messagesToCache.includes(lastMsg)) {
+        messagesToCache.push(lastMsg)
+      }
+    }
 
+    // Build provider options for all formats (SDK will pick the right one)
     const providerOptions = {
       anthropic: {
         cacheControl: { type: "ephemeral" },
@@ -188,13 +229,13 @@ export namespace ProviderTransform {
       openaiCompatible: {
         cache_control: { type: "ephemeral" },
       },
-      copilot: {
-        copilot_cache_control: { type: "ephemeral" },
-      },
     }
 
-    for (const msg of unique([...system, ...final])) {
-      const useMessageLevelOptions = model.providerID === "anthropic" || model.providerID.includes("bedrock")
+    // Determine if we should use message-level or content-level options
+    // Anthropic and Bedrock use message-level, others use content-level
+    const useMessageLevelOptions = format === "anthropic" || format === "bedrock"
+
+    for (const msg of unique(messagesToCache)) {
       const shouldUseContentOptions = !useMessageLevelOptions && Array.isArray(msg.content) && msg.content.length > 0
 
       if (shouldUseContentOptions) {
@@ -252,45 +293,10 @@ export namespace ProviderTransform {
   export function message(msgs: ModelMessage[], model: Provider.Model, options: Record<string, unknown>) {
     msgs = unsupportedParts(msgs, model)
     msgs = normalizeMessages(msgs, model, options)
-    // Apply caching for Anthropic models and Bedrock models that support prompt caching
-    // Bedrock prompt caching is supported by:
-    // - Anthropic Claude models (claude-3-5-sonnet, claude-3-5-haiku, claude-3-opus, etc.)
-    // - Amazon Nova models (nova-pro, nova-lite, nova-micro)
-    // NOT supported by: Llama, Mistral, Cohere, and other third-party models
-    const isBedrockProvider =
-      model.providerID === "amazon-bedrock" || model.api.npm === "@ai-sdk/amazon-bedrock"
-
-    const isAnthropicModel =
-      !isBedrockProvider &&
-      (model.providerID === "anthropic" ||
-        model.api.id.includes("anthropic") ||
-        model.api.id.includes("claude") ||
-        model.id.includes("anthropic") ||
-        model.id.includes("claude") ||
-        model.api.npm === "@ai-sdk/anthropic")
-
-    const isBedrockModelWithCaching = iife(() => {
-      if (!isBedrockProvider) {
-        return false
-      }
-      // Check for explicit caching option in model config
-      // This allows users to enable/disable caching for custom ARNs and inference profiles
-      // Example: { "options": { "caching": true } }
-      if (typeof model.options?.caching === "boolean") {
-        return model.options.caching
-      }
-      const modelId = model.api.id.toLowerCase()
-      // Claude models on Bedrock support caching
-      if (modelId.includes("anthropic") || modelId.includes("claude")) return true
-      // Amazon Nova models support caching
-      if (modelId.includes("amazon.nova") || modelId.includes("nova-")) return true
-      // Custom ARN models might support caching if they're Claude-based
-      // (ARNs like arn:aws:bedrock:...:custom-model/xxx can be Claude fine-tunes)
-      if (modelId.startsWith("arn:") && (modelId.includes("claude") || modelId.includes("anthropic"))) return true
-      return false
-    })
 
-    if ((isAnthropicModel || isBedrockModelWithCaching) && model.api.npm !== "@ai-sdk/gateway") {
+    // Apply caching only when explicitly enabled via model.caching
+    // No auto-detection - user must opt-in via model config
+    if (model.caching === true || (model.caching && typeof model.caching === "object")) {
       msgs = applyCaching(msgs, model)
     }
 

From 68406c17d007e8871f31b4ba3046674cfcb8224e Mon Sep 17 00:00:00 2001
From: Marcello Ceschia <marcello.ceschia@gmail.com>
Date: Wed, 25 Mar 2026 21:27:03 +0000
Subject: [PATCH 3/3] fix(bedrock): restore auto-detection for Claude/Nova and
 add options.caching support

- Re-add copilot to providerOptions (was accidentally removed)
- Add auto-detection for Bedrock Claude/Nova models (unless options.caching=false)
- Support explicit options.caching=true for custom ARNs
- Respect options.caching=false to disable caching on Anthropic models
- Fixes test failures from PR #16504
---
 packages/opencode/src/provider/transform.ts | 32 +++++++++++++++++++--
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/packages/opencode/src/provider/transform.ts b/packages/opencode/src/provider/transform.ts
index 3dd583dc6ce2..789c4dc875a9 100644
--- a/packages/opencode/src/provider/transform.ts
+++ b/packages/opencode/src/provider/transform.ts
@@ -229,6 +229,9 @@ export namespace ProviderTransform {
       openaiCompatible: {
         cache_control: { type: "ephemeral" },
       },
+      copilot: {
+        copilot_cache_control: { type: "ephemeral" },
+      },
     }
 
     // Determine if we should use message-level or content-level options
@@ -294,9 +297,32 @@ export namespace ProviderTransform {
     msgs = unsupportedParts(msgs, model)
     msgs = normalizeMessages(msgs, model, options)
 
-    // Apply caching only when explicitly enabled via model.caching
-    // No auto-detection - user must opt-in via model config
-    if (model.caching === true || (model.caching && typeof model.caching === "object")) {
+    // Apply caching when:
+    // 1. Explicitly enabled via model.caching (true or object)
+    // 2. Auto-detected: Anthropic models (not gateway)
+    // 3. Auto-detected: Amazon Bedrock models that support caching (Claude, Nova)
+    const isAnthropic = model.providerID === "anthropic" ||
+      model.api.id.includes("anthropic") ||
+      model.api.id.includes("claude") ||
+      model.id.includes("anthropic") ||
+      model.id.includes("claude") ||
+      model.api.npm === "@ai-sdk/anthropic"
+    const isBedrock = model.api.npm === "@ai-sdk/amazon-bedrock" || model.providerID.includes("bedrock")
+    const isBedrockCacheEligible = isBedrock && (
+      // Explicit caching option (true or false via model.options.caching)
+      (model.options?.caching === true) ||
+      // Explicit caching option via model.caching
+      (model.caching === true) ||
+      // Auto-detect Claude/Nova models (unless caching is explicitly disabled)
+      (model.options?.caching !== false && (
+        model.api.id.includes("claude") ||
+        model.api.id.includes("nova") ||
+        model.id.includes("claude") ||
+        model.id.includes("nova")
+      ))
+    )
+
+    if (model.caching || (isAnthropic && model.api.npm !== "@ai-sdk/gateway" && model.options?.caching !== false) || isBedrockCacheEligible) {
       msgs = applyCaching(msgs, model)
     }