Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { z } from "zod"
* ReasoningEffort
*/

export const reasoningEfforts = ["low", "medium", "high"] as const
export const reasoningEfforts = ["minimal", "low", "medium", "high"] as const

export const reasoningEffortsSchema = z.enum(reasoningEfforts)

Expand Down Expand Up @@ -44,11 +44,19 @@ export const modelInfoSchema = z.object({
supportsImages: z.boolean().optional(),
supportsComputerUse: z.boolean().optional(),
supportsPromptCache: z.boolean(),
// Whether this model supports temperature. Some Responses models (e.g. o-series) do not.
supportsTemperature: z.boolean().optional(),
// Capability flag to indicate whether the model supports an output verbosity parameter
supportsVerbosity: z.boolean().optional(),
supportsReasoningBudget: z.boolean().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z.boolean().optional(),
// Whether this model supports Responses API reasoning summaries
supportsReasoningSummary: z.boolean().optional(),
// The role to use for the system prompt ('system' or 'developer')
systemPromptRole: z.enum(["system", "developer"]).optional(),
// The default temperature for the model
defaultTemperature: z.number().optional(),
supportedParameters: z.array(modelParametersSchema).optional(),
inputPrice: z.number().optional(),
outputPrice: z.number().optional(),
Expand Down
24 changes: 24 additions & 0 deletions packages/types/src/providers/__tests__/openai.models.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { describe, it, expect } from "vitest"
import { openAiNativeModels } from "../openai.js"
import type { ModelInfo } from "../../model.js"

describe("openAiNativeModels temperature invariants", () => {
it("models with supportsTemperature === false must not specify defaultTemperature", () => {
for (const [_id, info] of Object.entries(openAiNativeModels)) {
const modelInfo = info as ModelInfo & { supportsTemperature?: boolean; defaultTemperature?: number }
if (modelInfo.supportsTemperature === false) {
expect(modelInfo.defaultTemperature).toBeUndefined()
}
}
})

it("gpt-5 family models must have supportsTemperature: false and no defaultTemperature", () => {
const gpt5Ids = ["gpt-5-2025-08-07", "gpt-5-mini-2025-08-07", "gpt-5-nano-2025-08-07"]
for (const id of gpt5Ids) {
const info = openAiNativeModels[id as keyof typeof openAiNativeModels] as ModelInfo & { supportsTemperature?: boolean; defaultTemperature?: number }
expect(info).toBeDefined()
expect(info.supportsTemperature).toBe(false)
expect(info.defaultTemperature).toBeUndefined()
}
})
})
163 changes: 90 additions & 73 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import type { ModelInfo } from "../model.js"
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels

export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5-2025-08-07"
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5"

export const openAiNativeModels = {
"gpt-5-chat-latest": {
Expand All @@ -19,6 +19,24 @@ export const openAiNativeModels = {
supportsVerbosity: true,
},
"gpt-5-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.13,
description: "GPT-5 (2025-08-07): Latest snapshot of GPT-5 model",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Critical Issue: There are duplicate model entries here - both versioned (e.g., "gpt-5-2025-08-07") and unversioned (e.g., "gpt-5") variants with identical configurations. This creates redundancy and potential confusion.

Consider using a single source of truth with aliases, or document why both variants are necessary.

maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
Expand All @@ -31,8 +49,29 @@ export const openAiNativeModels = {
description: "GPT-5: The best model for coding and agentic tasks across domains",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-mini-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 0.25,
outputPrice: 2.0,
cacheReadsPrice: 0.03,
description: "GPT-5 Mini (2025-08-07): Latest snapshot of GPT-5 Mini model",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-mini": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
Expand All @@ -44,8 +83,29 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.03,
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-nano-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 0.05,
outputPrice: 0.4,
cacheReadsPrice: 0.01,
description: "GPT-5 Nano (2025-08-07): Latest snapshot of GPT-5 Nano model",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-5-nano": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
Expand All @@ -57,6 +117,10 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.01,
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
supportsVerbosity: true,
// GPT-5 supports Responses API reasoning summaries
supportsReasoningSummary: true,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-4.1": {
maxTokens: 32_768,
Expand All @@ -66,6 +130,9 @@ export const openAiNativeModels = {
inputPrice: 2,
outputPrice: 8,
cacheReadsPrice: 0.5,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4.1-mini": {
maxTokens: 32_768,
Expand All @@ -75,6 +142,9 @@ export const openAiNativeModels = {
inputPrice: 0.4,
outputPrice: 1.6,
cacheReadsPrice: 0.1,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4.1-nano": {
maxTokens: 32_768,
Expand All @@ -84,6 +154,9 @@ export const openAiNativeModels = {
inputPrice: 0.1,
outputPrice: 0.4,
cacheReadsPrice: 0.025,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
o3: {
maxTokens: 100_000,
Expand All @@ -95,26 +168,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.5,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o3-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0,
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "high",
},
"o3-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0,
outputPrice: 8.0,
cacheReadsPrice: 0.5,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
"o4-mini": {
maxTokens: 100_000,
Expand All @@ -126,26 +181,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.275,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o4-mini-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "high",
},
"o4-mini-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.275,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
"o3-mini": {
maxTokens: 100_000,
Expand All @@ -157,26 +194,8 @@ export const openAiNativeModels = {
cacheReadsPrice: 0.55,
supportsReasoningEffort: true,
reasoningEffort: "medium",
},
"o3-mini-high": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "high",
},
"o3-mini-low": {
maxTokens: 100_000,
contextWindow: 200_000,
supportsImages: false,
supportsPromptCache: true,
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
reasoningEffort: "low",
systemPromptRole: "developer",
supportsTemperature: false,
},
o1: {
maxTokens: 100_000,
Expand All @@ -186,15 +205,8 @@ export const openAiNativeModels = {
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
},
"o1-preview": {
maxTokens: 32_768,
contextWindow: 128_000,
supportsImages: true,
supportsPromptCache: true,
inputPrice: 15,
outputPrice: 60,
cacheReadsPrice: 7.5,
systemPromptRole: "developer",
supportsTemperature: false,
},
"o1-mini": {
maxTokens: 65_536,
Expand All @@ -204,6 +216,8 @@ export const openAiNativeModels = {
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
systemPromptRole: "developer",
supportsTemperature: false,
},
"gpt-4o": {
maxTokens: 16_384,
Expand All @@ -213,6 +227,9 @@ export const openAiNativeModels = {
inputPrice: 2.5,
outputPrice: 10,
cacheReadsPrice: 1.25,
systemPromptRole: "system",
defaultTemperature: 0,
supportsTemperature: true,
},
"gpt-4o-mini": {
maxTokens: 16_384,
Expand All @@ -222,6 +239,8 @@ export const openAiNativeModels = {
inputPrice: 0.15,
outputPrice: 0.6,
cacheReadsPrice: 0.075,
systemPromptRole: "system",
defaultTemperature: 0,
},
"codex-mini-latest": {
maxTokens: 16_384,
Expand All @@ -243,13 +262,11 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
defaultTemperature: 0,
}

// https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#api-specs
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"

export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
export const GPT5_DEFAULT_TEMPERATURE = 1.0

export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
14 changes: 14 additions & 0 deletions src/api/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,20 @@ export interface ApiHandlerCreateMessageMetadata {
* Used to enforce "skip once" after a condense operation.
*/
suppressPreviousResponseId?: boolean

/**
* Force this call to operate statelessly (providers should set store=false and
* suppress any previous_response_id). Intended for the first call after local
* context rewriting (condense or sliding-window).
*/
forceStateless?: boolean

/**
* Optional stable cache key for OpenAI Responses API caching.
* When provided, providers that support it should pass it as prompt_cache_key.
* Per-call metadata takes precedence over handler options.
*/
promptCacheKey?: string
}

export interface ApiHandler {
Expand Down
Loading
Loading