Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { z } from "zod"
* ReasoningEffort
*/

export const reasoningEfforts = ["low", "medium", "high"] as const
export const reasoningEfforts = ["minimal", "low", "medium", "high"] as const

export const reasoningEffortsSchema = z.enum(reasoningEfforts)

Expand Down Expand Up @@ -44,11 +44,19 @@ export const modelInfoSchema = z.object({
supportsImages: z.boolean().optional(),
supportsComputerUse: z.boolean().optional(),
supportsPromptCache: z.boolean(),
// Whether this model supports temperature. Some Responses models (e.g. o-series) do not.
supportsTemperature: z.boolean().optional(),
// Capability flag to indicate whether the model supports an output verbosity parameter
supportsVerbosity: z.boolean().optional(),
supportsReasoningBudget: z.boolean().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z.boolean().optional(),
// Whether this model supports Responses API reasoning summaries
supportsReasoningSummary: z.boolean().optional(),
// The role to use for the system prompt ('system' or 'developer')
systemPromptRole: z.enum(["system", "developer"]).optional(),
// The default temperature for the model
defaultTemperature: z.number().optional(),
supportedParameters: z.array(modelParametersSchema).optional(),
inputPrice: z.number().optional(),
outputPrice: z.number().optional(),
Expand Down
33 changes: 33 additions & 0 deletions packages/types/src/providers/__tests__/openai.models.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { describe, it, expect } from "vitest"
import { openAiNativeModels } from "../openai.js"

type Dict = Record<string, unknown>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this test file change intentional for an i18n cleanup PR? While the improvements to type safety are good, they seem unrelated to removing unused translation keys. Consider moving these changes to a separate PR for clarity.

const hasProp = (obj: Dict, key: string) => Object.prototype.hasOwnProperty.call(obj, key)
const boolProp = (obj: Dict, key: string): boolean | undefined => {
const v = obj[key]
return typeof v === "boolean" ? (v as boolean) : undefined
}

describe("openAiNativeModels temperature invariants", () => {
it("models with supportsTemperature === false must not specify defaultTemperature", () => {
const values = Object.values(openAiNativeModels) as Dict[]
for (const info of values) {
const supportsTemp = boolProp(info, "supportsTemperature")
if (supportsTemp === false) {
expect(hasProp(info, "defaultTemperature")).toBe(false)
}
}
})

it("gpt-5 family models must have supportsTemperature: false and no defaultTemperature", () => {
const gpt5Ids = ["gpt-5-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-nano-2025-08-07"] as const
for (const id of gpt5Ids) {
// Non-undefined assertion is safe here because the IDs are known keys in openAiNativeModels
const infoUnknown = (openAiNativeModels as Record<string, unknown>)[id]!
const info = infoUnknown as Dict
expect(info).toBeDefined()
expect(boolProp(info, "supportsTemperature")).toBe(false)
expect(hasProp(info, "defaultTemperature")).toBe(false)
}
})
})
172 changes: 90 additions & 82 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { ModelInfo } from "../model.js"
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels

export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5-2025-08-07"
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5"

export const openAiNativeModels = {
"gpt-5-2025-08-07": {
Expand All @@ -19,6 +19,28 @@ export const openAiNativeModels = {
description: "GPT-5: The best model for coding and agentic tasks across domains",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.13,
description: "GPT-5: The best model for coding and agentic tasks across domains",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-mini-2025-08-07": {
maxTokens: 128000,
Expand All @@ -32,6 +54,27 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.03,
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-mini": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 0.25,
outputPrice: 2.0,
cacheReadsPrice: 0.03,
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-nano-2025-08-07": {
maxTokens: 128000,
Expand All @@ -45,6 +88,27 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.01,
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-nano": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 0.05,
outputPrice: 0.4,
cacheReadsPrice: 0.01,
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-4.1": {
maxTokens: 32_768,
Expand All @@ -54,6 +118,9 @@ export const openAiNativeModels = {
inputPrice: 2,
outputPrice: 8,
cacheReadsPrice: 0.5,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4.1-mini": {
maxTokens: 32_768,
Expand All @@ -63,6 +130,9 @@ export const openAiNativeModels = {
inputPrice: 0.4,
outputPrice: 1.6,
cacheReadsPrice: 0.1,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4.1-nano": {
maxTokens: 32_768,
Expand All @@ -72,6 +142,9 @@ export const openAiNativeModels = {
inputPrice: 0.1,
outputPrice: 0.4,
cacheReadsPrice: 0.025,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
o3: {
maxTokens: 100_000,
Expand All @@ -83,26 +156,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.5,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o3-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0,
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "high",
},
"o3-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0,
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
"o4-mini": {
maxTokens: 100_000,
Expand All @@ -114,26 +169,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.275,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o4-mini-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "high",
},
"o4-mini-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
"o3-mini": {
maxTokens: 100_000,
Expand All @@ -145,26 +182,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.55,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o3-mini-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "high",
},
"o3-mini-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
o1: {
maxTokens: 100_000,
Expand All @@ -174,15 +193,8 @@ export const openAiNativeModels = {
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
},
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
systemPromptRole: "developer",
supportsTemperature: false,
},
"o1-mini": {
maxTokens: 65_536,
Expand All @@ -192,15 +204,8 @@ export const openAiNativeModels = {
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
},
"gpt-4.5-preview": {
maxTokens: 16_384,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 75,
outputPrice: 150,
cacheReadsPrice: 37.5,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-4o": {
maxTokens: 16_384,
Expand All @@ -210,6 +215,9 @@ export const openAiNativeModels = {
inputPrice: 2.5,
outputPrice: 10,
cacheReadsPrice: 1.25,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4o-mini": {
maxTokens: 16_384,
Expand All @@ -219,6 +227,8 @@ export const openAiNativeModels = {
inputPrice: 0.15,
outputPrice: 0.6,
cacheReadsPrice: 0.075,
systemPromptRole: "system",
defaultTemperature: 0,
},
} as const satisfies Record<string, ModelInfo>

Expand All @@ -229,13 +239,11 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
defaultTemperature: 0,
}

// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"

export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
export const GPT5_DEFAULT_TEMPERATURE = 1.0

export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
14 changes: 14 additions & 0 deletions src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ export interface ApiHandlerCreateMessageMetadata {
* Used to enforce "skip once" after a condense operation.
*/
suppressPreviousResponseId?: boolean

/**
* Force this call to operate statelessly (providers should set store=false and
* suppress any previous_response_id). Intended for the first call after local
* context rewriting (condense or sliding-window).
*/
forceStateless?: boolean

/**
* Optional stable cache key for OpenAI Responses API caching.
* When provided, providers that support it should pass it as prompt_cache_key.
* Per-call metadata takes precedence over handler options.
*/
promptCacheKey?: string
}

export interface ApiHandler {
Expand Down
Loading