From a92fb7ce0b08a767d579205240674f2c50648674 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Thu, 18 Dec 2025 18:05:53 -0500 Subject: [PATCH] fix: strip unsupported JSON Schema format values for OpenAI compatibility When using OpenAI Native provider with MCP servers (like fetch), the API rejects schemas containing format values not supported by Structured Outputs. OpenAI only supports: date-time, time, date, duration, email, hostname, ipv4, ipv6, uuid This change updates normalizeToolSchema() to strip unsupported format values (like 'uri', 'uri-reference') while preserving supported ones. Fixes PostHog issue: 019b3341-35fb-76b1-8aba-ef36d698f9a2 --- src/utils/__tests__/json-schema.spec.ts | 179 ++++++++++++++++++++++++ src/utils/json-schema.ts | 33 ++++- 2 files changed, 209 insertions(+), 3 deletions(-) diff --git a/src/utils/__tests__/json-schema.spec.ts b/src/utils/__tests__/json-schema.spec.ts index 9e7eeb2e171..7b5c2e57b64 100644 --- a/src/utils/__tests__/json-schema.spec.ts +++ b/src/utils/__tests__/json-schema.spec.ts @@ -265,4 +265,183 @@ describe("normalizeToolSchema", () => { expect(props.line_ranges.items).toBeDefined() expect(props.line_ranges.description).toBe("Optional line ranges") }) + + describe("format field handling", () => { + it("should preserve supported format values (date-time)", () => { + const input = { + type: "string", + format: "date-time", + description: "Timestamp", + } + + const result = normalizeToolSchema(input) + + expect(result).toEqual({ + type: "string", + format: "date-time", + description: "Timestamp", + additionalProperties: false, + }) + }) + + it("should preserve supported format values (email)", () => { + const input = { + type: "string", + format: "email", + } + + const result = normalizeToolSchema(input) + + expect(result.format).toBe("email") + }) + + it("should preserve supported format values (uuid)", () => { + const input = { + type: "string", + format: "uuid", + } + + const result = normalizeToolSchema(input) + + expect(result.format).toBe("uuid") + }) + + it("should preserve all supported format values", () => { + const supportedFormats = [ + "date-time", + "time", + "date", + "duration", + "email", + "hostname", + "ipv4", + "ipv6", + "uuid", + ] + + for (const format of supportedFormats) { + const input = { type: "string", format } + const result = normalizeToolSchema(input) + expect(result.format).toBe(format) + } + }) + + it("should strip unsupported format value (uri)", () => { + const input = { + type: "string", + format: "uri", + description: "URL field", + } + + const result = normalizeToolSchema(input) + + expect(result).toEqual({ + type: "string", + description: "URL field", + additionalProperties: false, + }) + expect(result.format).toBeUndefined() + }) + + it("should strip unsupported format value (uri-reference)", () => { + const input = { + type: "string", + format: "uri-reference", + } + + const result = normalizeToolSchema(input) + + expect(result.format).toBeUndefined() + }) + + it("should strip unsupported format values (various)", () => { + const unsupportedFormats = ["uri", "uri-reference", "iri", "iri-reference", "regex", "json-pointer"] + + for (const format of unsupportedFormats) { + const input = { type: "string", format } + const result = normalizeToolSchema(input) + expect(result.format).toBeUndefined() + } + }) + + it("should strip unsupported format in nested properties", () => { + const input = { + type: "object", + properties: { + url: { + type: "string", + format: "uri", + description: "A URL", + }, + email: { + type: "string", + format: "email", + description: "An email", + }, + }, + } + + const result = normalizeToolSchema(input) + + const props = result.properties as Record> + expect(props.url.format).toBeUndefined() + expect(props.url.description).toBe("A URL") + expect(props.email.format).toBe("email") + expect(props.email.description).toBe("An email") + }) + + it("should strip unsupported format in deeply nested structures", () => { + const input = { + type: "object", + properties: { + items: { + type: "array", + items: { + type: "object", + properties: { + link: { + type: "string", + format: "uri", + }, + timestamp: { + type: "string", + format: "date-time", + }, + }, + }, + }, + }, + } + + const result = normalizeToolSchema(input) + + const props = result.properties as Record> + const itemsItems = props.items.items as Record + const nestedProps = itemsItems.properties as Record> + expect(nestedProps.link.format).toBeUndefined() + expect(nestedProps.timestamp.format).toBe("date-time") + }) + + it("should handle MCP fetch server schema with uri format", () => { + // This is similar to the actual fetch MCP server schema that caused the error + const input = { + type: "object", + properties: { + url: { + type: "string", + format: "uri", + description: "URL to fetch", + }, + }, + required: ["url"], + } + + const result = normalizeToolSchema(input) + + const props = result.properties as Record> + expect(props.url.format).toBeUndefined() + expect(props.url.type).toBe("string") + expect(props.url.description).toBe("URL to fetch") + }) + }) }) diff --git a/src/utils/json-schema.ts b/src/utils/json-schema.ts index de34a8669b5..caba962d741 100644 --- a/src/utils/json-schema.ts +++ b/src/utils/json-schema.ts @@ -6,6 +6,23 @@ import { z } from "zod" */ export type JsonSchema = z4.core.JSONSchema.JSONSchema +/** + * Set of format values supported by OpenAI's Structured Outputs (strict mode). + * Unsupported format values will be stripped during schema normalization. + * @see https://platform.openai.com/docs/guides/structured-outputs#supported-schemas + */ +const OPENAI_SUPPORTED_FORMATS = new Set([ + "date-time", + "time", + "date", + "duration", + "email", + "hostname", + "ipv4", + "ipv6", + "uuid", +]) + /** * Zod schema for JSON Schema primitive types */ @@ -76,10 +93,11 @@ const TypeFieldSchema = z.union([JsonSchemaTypeSchema, z.array(JsonSchemaTypeSch /** * Internal Zod schema that normalizes tool input JSON Schema to be compliant with JSON Schema draft 2020-12. * - * This schema performs two key transformations: + * This schema performs three key transformations: * 1. Sets `additionalProperties: false` by default (required by OpenAI strict mode) * 2. Converts deprecated `type: ["T", "null"]` array syntax to `anyOf` format * (required by Claude on Bedrock which enforces JSON Schema draft 2020-12) + * 3. Strips unsupported `format` values (e.g., "uri") for OpenAI Structured Outputs compatibility * * Uses recursive parsing so transformations apply to all nested schemas automatically. */ @@ -109,10 +127,12 @@ const NormalizedToolSchemaInternal: z.ZodType, z.ZodType minItems: z.number().optional(), maxItems: z.number().optional(), uniqueItems: z.boolean().optional(), + // Format field - unsupported values will be stripped in transform + format: z.string().optional(), }) .passthrough() .transform((schema) => { - const { type, required, properties, ...rest } = schema + const { type, required, properties, format, ...rest } = schema const result: Record = { ...rest } // If type is an array, convert to anyOf format (JSON Schema 2020-12) @@ -122,6 +142,12 @@ const NormalizedToolSchemaInternal: z.ZodType, z.ZodType result.type = type } + // Strip unsupported format values for OpenAI compatibility + // Only include format if it's a supported value + if (format && OPENAI_SUPPORTED_FORMATS.has(format)) { + result.format = format + } + // Handle properties and required for strict mode if (properties) { result.properties = properties @@ -145,10 +171,11 @@ const NormalizedToolSchemaInternal: z.ZodType, z.ZodType /** * Normalizes a tool input JSON Schema to be compliant with JSON Schema draft 2020-12. * - * This function performs two key transformations: + * This function performs three key transformations: * 1. Sets `additionalProperties: false` by default (required by OpenAI strict mode) * 2. Converts deprecated `type: ["T", "null"]` array syntax to `anyOf` format * (required by Claude on Bedrock which enforces JSON Schema draft 2020-12) + * 3. Strips unsupported `format` values (e.g., "uri") for OpenAI Structured Outputs compatibility * * Uses recursive parsing so transformations apply to all nested schemas automatically. *