diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index d713a47d6b4..ebb78a6c3c4 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -236,6 +236,7 @@ const bedrockSchema = apiModelIdProviderModelSchema.extend({ awsBedrockEndpointEnabled: z.boolean().optional(), awsBedrockEndpoint: z.string().optional(), awsBedrock1MContext: z.boolean().optional(), // Enable 'context-1m-2025-08-07' beta for 1M context window. + awsBedrockServiceTier: z.enum(["STANDARD", "FLEX", "PRIORITY"]).optional(), // AWS Bedrock service tier selection }) const vertexSchema = apiModelIdProviderModelSchema.extend({ diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index 9eaa656ef11..3cc36dcd04c 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -562,3 +562,31 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-5-20251101-v1:0", ] as const + +// Amazon Bedrock Service Tier types +export type BedrockServiceTier = "STANDARD" | "FLEX" | "PRIORITY" + +// Models that support service tiers based on AWS documentation +// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html +export const BEDROCK_SERVICE_TIER_MODEL_IDS = [ + // Amazon Nova models + "amazon.nova-lite-v1:0", + "amazon.nova-2-lite-v1:0", + "amazon.nova-pro-v1:0", + "amazon.nova-pro-latency-optimized-v1:0", + // DeepSeek models + "deepseek.r1-v1:0", + // Qwen models + "qwen.qwen3-next-80b-a3b", + "qwen.qwen3-coder-480b-a35b-v1:0", + // OpenAI GPT-OSS models + "openai.gpt-oss-20b-1:0", + "openai.gpt-oss-120b-1:0", +] as const + +// Service tier pricing multipliers +export const BEDROCK_SERVICE_TIER_PRICING = { + STANDARD: 1.0, // Base price + FLEX: 0.5, // 50% discount from standard + PRIORITY: 1.75, // 75% premium over standard +} as const diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index cccec3818f4..dd6febcc89a 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -25,7 +25,7 @@ vi.mock("@aws-sdk/client-bedrock-runtime", () => { import { AwsBedrockHandler } from "../bedrock" import { ConverseStreamCommand, BedrockRuntimeClient } from "@aws-sdk/client-bedrock-runtime" -import { BEDROCK_1M_CONTEXT_MODEL_IDS } from "@roo-code/types" +import { BEDROCK_1M_CONTEXT_MODEL_IDS, BEDROCK_SERVICE_TIER_MODEL_IDS, bedrockModels } from "@roo-code/types" import type { Anthropic } from "@anthropic-ai/sdk" @@ -755,4 +755,245 @@ describe("AwsBedrockHandler", () => { expect(commandArg.modelId).toBe(`us.${BEDROCK_1M_CONTEXT_MODEL_IDS[0]}`) }) }) + + describe("service tier feature", () => { + const supportedModelId = BEDROCK_SERVICE_TIER_MODEL_IDS[0] // amazon.nova-lite-v1:0 + + beforeEach(() => { + mockConverseStreamCommand.mockReset() + }) + + describe("pricing multipliers in getModel()", () => { + it("should apply FLEX tier pricing with 50% discount", () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "FLEX", + }) + + const model = handler.getModel() + const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as { + inputPrice: number + outputPrice: number + } + + // FLEX tier should apply 0.5 multiplier (50% discount) + expect(model.info.inputPrice).toBe(baseModel.inputPrice * 0.5) + expect(model.info.outputPrice).toBe(baseModel.outputPrice * 0.5) + }) + + it("should apply PRIORITY tier pricing with 75% premium", () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "PRIORITY", + }) + + const model = handler.getModel() + const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as { + inputPrice: number + outputPrice: number + } + + // PRIORITY tier should apply 1.75 multiplier (75% premium) + expect(model.info.inputPrice).toBe(baseModel.inputPrice * 1.75) + expect(model.info.outputPrice).toBe(baseModel.outputPrice * 1.75) + }) + + it("should not modify pricing for STANDARD tier", () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "STANDARD", + }) + + const model = handler.getModel() + const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as { + inputPrice: number + outputPrice: number + } + + // STANDARD tier should not modify pricing (1.0 multiplier) + expect(model.info.inputPrice).toBe(baseModel.inputPrice) + expect(model.info.outputPrice).toBe(baseModel.outputPrice) + }) + + it("should not apply service tier pricing for unsupported models", () => { + const unsupportedModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0" + const handler = new AwsBedrockHandler({ + apiModelId: unsupportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "FLEX", // Try to apply FLEX tier + }) + + const model = handler.getModel() + const baseModel = bedrockModels[unsupportedModelId as keyof typeof bedrockModels] as { + inputPrice: number + outputPrice: number + } + + // Pricing should remain unchanged for unsupported models + expect(model.info.inputPrice).toBe(baseModel.inputPrice) + expect(model.info.outputPrice).toBe(baseModel.outputPrice) + }) + }) + + describe("service_tier parameter in API requests", () => { + it("should include service_tier as top-level parameter for supported models", async () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "PRIORITY", + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() // Start the generator + + // Verify the command was created with service_tier at top level + // Per AWS documentation, service_tier must be a top-level parameter, not inside additionalModelRequestFields + // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // service_tier should be at the top level of the payload + expect(commandArg.service_tier).toBe("PRIORITY") + // service_tier should NOT be in additionalModelRequestFields + if (commandArg.additionalModelRequestFields) { + expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined() + } + }) + + it("should include service_tier FLEX as top-level parameter", async () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "FLEX", + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() // Start the generator + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // service_tier should be at the top level of the payload + expect(commandArg.service_tier).toBe("FLEX") + // service_tier should NOT be in additionalModelRequestFields + if (commandArg.additionalModelRequestFields) { + expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined() + } + }) + + it("should NOT include service_tier for unsupported models", async () => { + const unsupportedModelId = "anthropic.claude-3-5-sonnet-20241022-v2:0" + const handler = new AwsBedrockHandler({ + apiModelId: unsupportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsBedrockServiceTier: "PRIORITY", // Try to apply PRIORITY tier + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() // Start the generator + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Service tier should NOT be included for unsupported models (at top level or in additionalModelRequestFields) + expect(commandArg.service_tier).toBeUndefined() + if (commandArg.additionalModelRequestFields) { + expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined() + } + }) + + it("should NOT include service_tier when not specified", async () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + // No awsBedrockServiceTier specified + }) + + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: "Test message", + }, + ] + + const generator = handler.createMessage("", messages) + await generator.next() // Start the generator + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Service tier should NOT be included when not specified (at top level or in additionalModelRequestFields) + expect(commandArg.service_tier).toBeUndefined() + if (commandArg.additionalModelRequestFields) { + expect(commandArg.additionalModelRequestFields.service_tier).toBeUndefined() + } + }) + }) + + describe("service tier with cross-region inference", () => { + it("should apply service tier pricing with cross-region inference prefix", () => { + const handler = new AwsBedrockHandler({ + apiModelId: supportedModelId, + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsUseCrossRegionInference: true, + awsBedrockServiceTier: "FLEX", + }) + + const model = handler.getModel() + const baseModel = bedrockModels[supportedModelId as keyof typeof bedrockModels] as { + inputPrice: number + outputPrice: number + } + + // Model ID should have cross-region prefix + expect(model.id).toBe(`us.${supportedModelId}`) + + // FLEX tier pricing should still be applied + expect(model.info.inputPrice).toBe(baseModel.inputPrice * 0.5) + expect(model.info.outputPrice).toBe(baseModel.outputPrice * 0.5) + }) + }) + }) }) diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 4a4adfc0f41..51793df2185 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -18,6 +18,7 @@ import { type ModelInfo, type ProviderSettings, type BedrockModelId, + type BedrockServiceTier, bedrockDefaultModelId, bedrockModels, bedrockDefaultPromptRouterModelId, @@ -27,6 +28,8 @@ import { AWS_INFERENCE_PROFILE_MAPPING, BEDROCK_1M_CONTEXT_MODEL_IDS, BEDROCK_GLOBAL_INFERENCE_MODEL_IDS, + BEDROCK_SERVICE_TIER_MODEL_IDS, + BEDROCK_SERVICE_TIER_PRICING, } from "@roo-code/types" import { ApiStream } from "../transform/stream" @@ -74,6 +77,13 @@ interface BedrockPayload { toolConfig?: ToolConfiguration } +// Extended payload type that includes service_tier as a top-level parameter +// AWS Bedrock service tiers (STANDARD, FLEX, PRIORITY) are specified at the top level +// https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html +type BedrockPayloadWithServiceTier = BedrockPayload & { + service_tier?: BedrockServiceTier +} + // Define specific types for content block events to avoid 'as any' usage // These handle the multiple possible structures returned by AWS SDK interface ContentBlockStartEvent { @@ -433,6 +443,17 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH additionalModelRequestFields.anthropic_beta = anthropicBetas } + // Determine if service tier should be applied (checked later when building payload) + const useServiceTier = + this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelId as any) + if (useServiceTier) { + logger.info("Service tier specified for Bedrock request", { + ctx: "bedrock", + modelId: modelConfig.id, + serviceTier: this.options.awsBedrockServiceTier, + }) + } + // Build tool configuration if native tools are enabled let toolConfig: ToolConfiguration | undefined if (useNativeTools && metadata?.tools) { @@ -442,7 +463,10 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH } } - const payload: BedrockPayload = { + // Build payload with optional service_tier at top level + // Service tier is a top-level parameter per AWS documentation, NOT inside additionalModelRequestFields + // https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html + const payload: BedrockPayloadWithServiceTier = { modelId: modelConfig.id, messages: formatted.messages, system: formatted.system, @@ -451,6 +475,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH // Add anthropic_version at top level when using thinking features ...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }), ...(toolConfig && { toolConfig }), + // Add service_tier as a top-level parameter (not inside additionalModelRequestFields) + ...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }), } // Create AbortController with 10 minute timeout @@ -1089,6 +1115,30 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH defaultTemperature: BEDROCK_DEFAULT_TEMPERATURE, }) + // Apply service tier pricing if specified and model supports it + const baseModelIdForTier = this.parseBaseModelId(modelConfig.id) + if (this.options.awsBedrockServiceTier && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(baseModelIdForTier as any)) { + const pricingMultiplier = BEDROCK_SERVICE_TIER_PRICING[this.options.awsBedrockServiceTier] + if (pricingMultiplier && pricingMultiplier !== 1.0) { + // Apply pricing multiplier to all price fields + modelConfig.info = { + ...modelConfig.info, + inputPrice: modelConfig.info.inputPrice + ? modelConfig.info.inputPrice * pricingMultiplier + : undefined, + outputPrice: modelConfig.info.outputPrice + ? modelConfig.info.outputPrice * pricingMultiplier + : undefined, + cacheWritesPrice: modelConfig.info.cacheWritesPrice + ? modelConfig.info.cacheWritesPrice * pricingMultiplier + : undefined, + cacheReadsPrice: modelConfig.info.cacheReadsPrice + ? modelConfig.info.cacheReadsPrice * pricingMultiplier + : undefined, + } + } + } + // Don't override maxTokens/contextWindow here; handled in getModelById (and includes user overrides) return { ...modelConfig, ...params } as { id: BedrockModelId | string diff --git a/webview-ui/src/components/settings/providers/Bedrock.tsx b/webview-ui/src/components/settings/providers/Bedrock.tsx index fac75170e96..75c4ea9a176 100644 --- a/webview-ui/src/components/settings/providers/Bedrock.tsx +++ b/webview-ui/src/components/settings/providers/Bedrock.tsx @@ -5,9 +5,11 @@ import { VSCodeTextField } from "@vscode/webview-ui-toolkit/react" import { type ProviderSettings, type ModelInfo, + type BedrockServiceTier, BEDROCK_REGIONS, BEDROCK_1M_CONTEXT_MODEL_IDS, BEDROCK_GLOBAL_INFERENCE_MODEL_IDS, + BEDROCK_SERVICE_TIER_MODEL_IDS, } from "@roo-code/types" import { useAppTranslation } from "@src/i18n/TranslationContext" @@ -35,6 +37,10 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo !!apiConfiguration?.apiModelId && BEDROCK_GLOBAL_INFERENCE_MODEL_IDS.includes(apiConfiguration.apiModelId as any) + // Check if the selected model supports service tiers + const supportsServiceTiers = + !!apiConfiguration?.apiModelId && BEDROCK_SERVICE_TIER_MODEL_IDS.includes(apiConfiguration.apiModelId as any) + // Update the endpoint enabled state when the configuration changes useEffect(() => { setAwsEndpointSelected(!!apiConfiguration?.awsBedrockEndpointEnabled) @@ -150,6 +156,49 @@ export const Bedrock = ({ apiConfiguration, setApiConfigurationField, selectedMo + {supportsServiceTiers && ( +
+ + +
+ {t("settings:providers.awsServiceTierNote")} +
+
+ )} {supportsGlobalInference && (