From 42884898cbb60fad2902c2cad8494cfdb3705ec0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 23 Oct 2025 16:44:40 +0000 Subject: [PATCH 1/9] Add detailed response explaining TanStack DB schema types to PowerSync team MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This document clarifies the TInput/TOutput architecture and explains how PowerSync can support arbitrary type transformations (like Date objects) by handling serialization in their integration layer rather than constraining TOutput to match SQLite types. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- powersync-response.md | 451 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 451 insertions(+) create mode 100644 powersync-response.md diff --git a/powersync-response.md b/powersync-response.md new file mode 100644 index 000000000..5d1d14815 --- /dev/null +++ b/powersync-response.md @@ -0,0 +1,451 @@ +# PowerSync Integration: Schema Types and Transformations + +Hi! Thanks for the detailed explanation and the POC. I think there's a misunderstanding about how `TInput`/`TOutput` work in TanStack DB schemas, and the good news is that you **can** support arbitrary TOutput transformations (like `Date` objects) with your PowerSync integration! + +## TL;DR + +**TOutput should always be the rich JavaScript types that users want to work with** (e.g., `Date` objects). Your PowerSync integration layer is responsible for: +- Deserializing SQLite types β†’ TOutput when syncing TO TanStack DB +- Serializing TOutput β†’ SQLite types when persisting mutations FROM TanStack DB + +You don't need to constrain TOutput to match SQLite types. + +--- + +## Understanding TInput and TOutput + +Let me clarify the data flow: + +### TInput: What Users Provide for Mutations + +When users call `collection.insert()` or `collection.update()`, they provide data in the **TInput** format. The schema validates and transforms this into **TOutput**. + +```typescript +// Example schema +const schema = z.object({ + id: z.string(), + created_at: z.string().transform(val => new Date(val)) +}) + +// TInput: { id: string, created_at: string } +// TOutput: { id: string, created_at: Date } + +// User inserts with TInput +collection.insert({ + id: "1", + created_at: "2023-01-01T00:00:00.000Z" // string +}) + +// Collection stores as TOutput +collection.get("1") +// Result: { id: "1", created_at: Date } // Date object +``` + +### TOutput: What Gets Stored and Read from Collection + +**All data in the collection is stored as TOutput.** This includes: +1. Data synced via `write()` +2. Data from user mutations (after validation) +3. Data in `PendingMutation.modified` + +Looking at the source code: + +```typescript +// packages/db/src/collection/sync.ts:93 +write: (message: Omit, 'key'>) => void +``` + +The `write()` function expects **TOutput**, not SQLite types. + +```typescript +// packages/db/src/collection/mutations.ts:179 +const mutation: PendingMutation = { + mutationId: crypto.randomUUID(), + original: {}, + modified: validatedData, // This is TOutput + // ... +} +``` + +**PendingMutations store TOutput**, which is the in-memory representation. + +--- + +## How PowerSync Integration Should Work + +Your integration has two responsibilities: + +### 1. Syncing FROM SQLite TO TanStack DB + +When reading from SQLite and syncing to the collection, **deserialize to TOutput before calling write()**: + +```typescript +// Your PowerSync sync implementation +const sync: SyncConfig = { + sync: ({ write, begin, commit }) => { + // Read from SQLite + const sqliteRows = db.execute("SELECT * FROM documents") + + begin() + for (const row of sqliteRows) { + // SQLite gives you: { id: "1", created_at: "2023-01-01T00:00:00.000Z" } + + // Option A: If you have a schema, use validateData to transform + const transformed = collection.validateData(row, 'insert') + // Result: { id: "1", created_at: Date } + + write({ + type: 'insert', + value: transformed // TOutput with Date object + }) + + // Option B: If no schema, you need to manually transform + // const transformed = { + // ...row, + // created_at: new Date(row.created_at) + // } + // write({ type: 'insert', value: transformed }) + } + commit() + } +} +``` + +### 2. Persisting FROM TanStack DB TO SQLite + +When handling mutations (onInsert/onUpdate/onDelete), **serialize TOutput to SQLite types**: + +```typescript +const collection = createCollection( + powerSyncCollectionOptions({ + database: db, + table: APP_SCHEMA.props.documents, + schema: z.object({ + id: z.string(), + name: z.string(), + created_at: z.string().transform(val => new Date(val)) + }), + + // In your mutation handler, serialize before writing to SQLite + onInsert: async ({ transaction }) => { + const mutation = transaction.mutations[0] + const item = mutation.modified // This is TOutput: { created_at: Date } + + // Serialize to SQLite types + const sqliteData = { + id: item.id, + name: item.name, + created_at: item.created_at.toISOString() // Date β†’ string + } + + // Write to SQLite + await db.execute( + "INSERT INTO documents (id, name, created_at) VALUES (?, ?, ?)", + [sqliteData.id, sqliteData.name, sqliteData.created_at] + ) + + // Add to upload queue + await uploadQueue.enqueue(mutation) + } + }) +) +``` + +--- + +## Real Examples from Other Integrations + +All existing integrations follow this pattern. Let me show you: + +### Example 1: Manual Sync (packages/query-db-collection/src/manual-sync.ts) + +```typescript +// Line 145-150 +case 'insert': { + const resolved = ctx.collection.validateData(op.data, 'insert') + ctx.write({ + type: 'insert', + value: resolved // TOutput (with Date objects if schema transforms) + }) + break +} +``` + +The manual sync validates data to get TOutput, then writes it to the collection. + +### Example 2: RxDB Integration (packages/rxdb-db-collection/src/rxdb.ts) + +```typescript +// Line 189-191 +write({ + type: 'insert', + value: stripRxdbFields(clone(d)) // Application-level objects +}) +``` + +RxDB handles its own serialization internally. By the time it reaches `write()`, it's already in application types. + +### Example 3: Schema Validation Test (packages/db/tests/collection-schema.test.ts) + +This test demonstrates exactly what you want to do: + +```typescript +// Line 14-43 +const userSchema = z.object({ + id: z.string(), + name: z.string(), + email: z.string().email(), + created_at: z.string().transform((val) => new Date(val)), // string β†’ Date + updated_at: z.string().transform((val) => new Date(val)), +}) + +const collection = createCollection({ + getKey: (item) => item.id, + schema: userSchema, + sync: { sync: () => {} }, +}) + +// Test insert validation +const insertData = { + id: '1', + name: 'John Doe', + email: 'john@example.com', + created_at: '2023-01-01T00:00:00.000Z', // Input: string + updated_at: '2023-01-01T00:00:00.000Z', +} + +const validatedInsert = collection.validateData(insertData, 'insert') + +// Verify that the data has been transformed +expect(validatedInsert.created_at).toBeInstanceOf(Date) // βœ… It's a Date! +expect(validatedInsert.updated_at).toBeInstanceOf(Date) +``` + +The schema successfully transforms strings to Dates, and that's what gets stored in the collection. + +--- + +## Addressing Your Specific Concerns + +> **"If we want to return a Date when reading, TOutput should be Date."** + +βœ… Correct! TOutput should be Date. + +> **"Developers must provide an ISO string when insertingβ€”this is not ideal, but manageable."** + +βœ… This is actually perfect! Users provide strings (TInput), schema transforms to Date (TOutput). + +```typescript +// User-friendly API +collection.insert({ + id: "1", + created_at: "2023-01-01T00:00:00.000Z" // String is fine +}) + +// Gets transformed to Date automatically +collection.get("1").created_at // Returns: Date object +``` + +> **"Incoming sync data is a string; we need to validate/convert it before writing. The schema can help, but handling validation failures is tricky."** + +βœ… Use `collection.validateData()` before calling `write()`: + +```typescript +sync: ({ write, begin, commit }) => { + begin() + for (const sqliteRow of sqliteData) { + try { + // This transforms string β†’ Date using the schema + const validated = collection.validateData(sqliteRow, 'insert') + write({ type: 'insert', value: validated }) + } catch (error) { + if (error instanceof SchemaValidationError) { + // Handle validation failure - maybe skip this row, log it, etc. + console.error('Invalid data from SQLite:', error.issues) + continue + } + throw error + } + } + commit() +} +``` + +> **"The main blocker is PendingMutations: we can't directly write a Date (or other arbitrary types) from the mutation to SQLite"** + +βœ… This is where you serialize! PendingMutations have Date objects, but you serialize when writing to SQLite: + +```typescript +onInsert: async ({ transaction }) => { + const mutation = transaction.mutations[0] + const item = mutation.modified // TOutput: { created_at: Date } + + // Serialize just for SQLite + const sqliteRow = { + ...item, + created_at: item.created_at.toISOString() // Date β†’ string + } + + await db.execute("INSERT INTO ...", sqliteRow) +} +``` + +> **"The schema doesn't convert it back to TInput."** + +βœ… Correct - and it doesn't need to! **You handle serialization in your integration layer**, not in the schema. The schema is for user-facing transformations, not storage serialization. + +--- + +## Recommended Architecture + +Here's how I'd structure your PowerSync integration: + +```typescript +// Helper function to serialize TOutput β†’ SQLite +function serializeForSQLite(item: TOutput): SQLiteRow { + return { + ...item, + created_at: item.created_at instanceof Date + ? item.created_at.toISOString() + : item.created_at, + // Handle other type conversions as needed + } +} + +// Helper function to deserialize SQLite β†’ TOutput +function deserializeFromSQLite(row: SQLiteRow, collection: Collection): TOutput { + // Use the collection's schema to transform + return collection.validateData(row, 'insert') +} + +export function powerSyncCollectionOptions(config) { + return { + ...config, + + sync: { + sync: ({ write, begin, commit, collection }) => { + // Read from SQLite + const rows = config.database.execute(...) + + begin() + for (const row of rows) { + // Deserialize: SQLite β†’ TOutput + const deserialized = deserializeFromSQLite(row, collection) + write({ type: 'insert', value: deserialized }) + } + commit() + } + }, + + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // TOutput + + // Serialize: TOutput β†’ SQLite + const sqliteRow = serializeForSQLite(item) + + // Write to SQLite + await config.database.execute( + "INSERT INTO ...", + sqliteRow + ) + }, + + // Similar for onUpdate, onDelete + } +} +``` + +--- + +## Summary + +You asked: +> "Let me know if you have suggestions/feedback or if I've misunderstood any part of the TanStackDB schema handling!" + +**Key points:** + +1. βœ… **TOutput should be rich JavaScript types** (Date, etc.) - this is what users see +2. βœ… **TInput is what users provide** for mutations (can be strings that transform to Date) +3. βœ… **Your integration handles serialization**, not the schema: + - When syncing TO collection: `SQLite types` β†’ (deserialize) β†’ `TOutput` β†’ `write()` + - When persisting FROM collection: `mutation.modified` (TOutput) β†’ (serialize) β†’ `SQLite types` +4. βœ… **PendingMutations store TOutput** - you serialize when writing to SQLite +5. βœ… **Use `collection.validateData()`** to transform SQLite data before calling `write()` + +The limitation you described is self-imposed! You **can** support arbitrary TOutput transformations - you just need to handle serialization in your PowerSync adapter layer, similar to how RxDB and Electric do it. + +--- + +## Example: Full Date Support + +Here's a complete example showing Date support: + +```typescript +// User-friendly schema +const schema = z.object({ + id: z.string(), + name: z.string(), + created_at: z.string().transform(val => new Date(val)), // TInput: string, TOutput: Date + updated_at: z.string().transform(val => new Date(val)), +}) + +const collection = createCollection( + powerSyncCollectionOptions({ + database: powerSyncDB, + table: APP_SCHEMA.props.documents, + schema, + + sync: { + sync: ({ write, begin, commit, collection }) => { + // Sync FROM SQLite + powerSyncDB.watch('documents', (changes) => { + begin() + for (const change of changes) { + // SQLite row: { id: "1", created_at: "2023-01-01T00:00:00.000Z" } + // validateData transforms string β†’ Date + const transformed = collection.validateData(change, 'insert') + // Now: { id: "1", created_at: Date } + + write({ type: 'insert', value: transformed }) + } + commit() + }) + } + }, + + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified + // item.created_at is a Date here (TOutput) + + // Serialize for SQLite + const sqliteRow = { + ...item, + created_at: item.created_at.toISOString(), // Date β†’ string + updated_at: item.updated_at.toISOString(), + } + + // Write to SQLite + await powerSyncDB.execute( + "INSERT INTO documents (id, name, created_at, updated_at) VALUES (?, ?, ?, ?)", + [sqliteRow.id, sqliteRow.name, sqliteRow.created_at, sqliteRow.updated_at] + ) + } + }) +) + +// Users get a great API: +collection.insert({ + id: "1", + name: "My Doc", + created_at: "2023-01-01T00:00:00.000Z" // String input is fine +}) + +// Reads return Date objects: +const doc = collection.get("1") +console.log(doc.created_at instanceof Date) // true βœ… +console.log(doc.created_at.getFullYear()) // 2023 +``` + +--- + +Hope this clarifies things! Your POC looks great, and with this approach you can provide the best DX (Date objects, custom types) while still syncing through SQLite. Let me know if you have questions! From a3fd3c0658cd1c731e15d6a2dba2452737fd4aa8 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 31 Oct 2025 15:28:49 +0000 Subject: [PATCH 2/9] Add comprehensive schema documentation proposal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This proposal addresses the lack of documentation around TInput/TOutput schema types and transformations. It includes: - Complete content outline for new schemas.md guide - Data flow diagrams and examples - Guidance for both app developers and integration authors - Common patterns for Date handling, defaults, and type conversions - Updates to existing docs (overview, mutations, collection-options-creator) The proposal directly addresses confusion like what the PowerSync team experienced regarding how to handle type transformations and serialization in integrations. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- schema-documentation-proposal.md | 650 +++++++++++++++++++++++++++++++ 1 file changed, 650 insertions(+) create mode 100644 schema-documentation-proposal.md diff --git a/schema-documentation-proposal.md b/schema-documentation-proposal.md new file mode 100644 index 000000000..fde7946e8 --- /dev/null +++ b/schema-documentation-proposal.md @@ -0,0 +1,650 @@ +# Schema Documentation Proposal + +## Current State Analysis + +### What's Currently Documented + +**overview.md (lines 144-154):** +- Basic mention that collections support schemas (StandardSchema compatible) +- States schemas are used for "client-side validation of optimistic mutations" +- Notes you can't provide both a schema and an explicit type +- Shows basic usage: `schema: todoSchema` + +**mutations.md (lines 495-560):** +- Shows schema validation for **action parameters** (validating inputs to `createOptimisticAction`) +- Does NOT cover TInput/TOutput transformations for collection data + +### Critical Gaps + +1. ❌ **No explanation of TInput vs TOutput** - The core concept is missing +2. ❌ **No transformation examples** - No `.transform()` usage shown +3. ❌ **No default value examples** - No `.default()` usage shown +4. ❌ **No data flow explanation** - Where does validation happen in the system? +5. ❌ **No type conversion patterns** - Common patterns like Date handling, enums, computed fields +6. ❌ **No integration guidance** - How integrations should handle serialization/deserialization +7. ❌ **No best practices** - When to use schemas, what to transform, performance considerations + +--- + +## Proposed Solution: New Dedicated Guide + +**Create: `docs/guides/schemas.md`** + +This deserves its own guide because: +- It's a substantial topic spanning mutations, queries, and sync +- It's relevant to all collection types +- It affects integration authors and app developers differently +- Discoverability is important for this foundational concept + +--- + +## Proposed Content Structure + +### 1. Introduction & Core Concepts (5-10 min read) + +**Title:** "Schema Validation and Type Transformations" + +**Opening:** +- What schemas do in TanStack DB +- Why you should use them (type safety, runtime validation, data transformation) +- Overview of StandardSchema compatibility (Zod, Valibot, ArkType, Effect) + +**Core Concept: TInput vs TOutput** +```typescript +// Example showing the concept clearly +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean(), + created_at: z.string().transform(val => new Date(val)), // TInput: string, TOutput: Date + priority: z.number().default(0) // TInput: optional, TOutput: always present +}) + +// TInput = { id: string, text: string, completed: boolean, created_at: string, priority?: number } +// TOutput = { id: string, text: string, completed: boolean, created_at: Date, priority: number } +``` + +**Explain:** +- TInput: What users provide when calling `insert()` or `update()` +- TOutput: What gets stored in the collection and returned from queries +- Schema transforms TInput β†’ TOutput at the collection boundary + +--- + +### 2. The Data Flow (visual diagram + explanation) + +**Include a diagram showing:** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ User's Code β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ TInput (strings, partial data) + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ collection.insert(data) β”‚ +β”‚ ↓ β”‚ +β”‚ Schema Validation & Transformation β”‚ +β”‚ (TInput β†’ TOutput) β”‚ +β”‚ ↓ β”‚ +β”‚ - Validate types and constraints β”‚ +β”‚ - Apply transformations (.transform()) β”‚ +β”‚ - Apply defaults (.default()) β”‚ +β”‚ - Convert types (string β†’ Date, etc.) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ TOutput (Dates, complete data) + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Collection Storage β”‚ +β”‚ (stores as TOutput) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ TOutput + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Live Queries / Reads β”‚ +β”‚ (returns TOutput) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Key Points:** +1. Validation happens at the **collection boundary** (during `insert()`, `update()`, and sync writes) +2. **Everything stored in the collection is TOutput** +3. **Everything read from the collection is TOutput** +4. PendingMutations also store TOutput + +--- + +### 3. Transformation Examples + +**3.1 Type Conversions** + +**Example: String to Date** +```typescript +const eventSchema = z.object({ + id: z.string(), + name: z.string(), + start_time: z.string().transform(val => new Date(val)) +}) + +const collection = createCollection({ + schema: eventSchema, + // ... +}) + +// User provides string +collection.insert({ + id: "1", + name: "Conference", + start_time: "2024-01-01T10:00:00Z" // TInput: string +}) + +// Collection stores Date +const event = collection.get("1") +console.log(event.start_time.getFullYear()) // TOutput: Date +``` + +**Example: Number/String to Enum** +```typescript +const statusSchema = z.object({ + id: z.string(), + status: z.union([ + z.literal('draft'), + z.literal('published'), + z.literal('archived') + ]).default('draft') +}) +``` + +**Example: Computed Fields** +```typescript +const userSchema = z.object({ + id: z.string(), + first_name: z.string(), + last_name: z.string(), +}).transform(data => ({ + ...data, + full_name: `${data.first_name} ${data.last_name}` // Computed during insert +})) +``` + +**3.2 Default Values** + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + created_at: z.date().default(() => new Date()), + priority: z.number().default(0), + tags: z.array(z.string()).default([]) +}) + +// User can omit fields with defaults +collection.insert({ + id: "1", + text: "Buy groceries" + // completed, created_at, priority, tags will be added automatically +}) +``` + +**3.3 Input Validation & Constraints** + +```typescript +const productSchema = z.object({ + id: z.string(), + name: z.string().min(3, "Name must be at least 3 characters"), + price: z.number().positive("Price must be positive"), + email: z.string().email("Invalid email address"), + age: z.number().int().min(18).max(120) +}) + +// This will throw SchemaValidationError +collection.insert({ + id: "1", + name: "A", // Too short + price: -10, // Negative + email: "not-an-email", // Invalid format + age: 200 // Out of range +}) +``` + +--- + +### 4. Handling Updates with Schemas + +**The Challenge with Updates:** +When updating, existing data is already TOutput (e.g., Date objects), but users provide TInput (strings). You need to handle both. + +**Pattern: Union Types** +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val), + updated_at: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val) +}) + +const collection = createCollection({ + schema: todoSchema, + // ... +}) + +// Insert with string (TInput) +collection.insert({ + id: "1", + text: "Task", + created_at: "2024-01-01T00:00:00Z" // string +}) + +// Update merges with existing data (which has Date) +collection.update("1", (draft) => { + draft.updated_at = new Date() // Can use Date OR string + // created_at is already a Date from the insert +}) +``` + +**Why This Works:** +1. During insert: String input β†’ validated β†’ transformed to Date β†’ stored as TOutput +2. During update: Schema merges partial update with existing TOutput, validates the merged result +3. Union type accepts both string (from user input) and Date (from existing data) + +--- + +### 5. Integration-Specific Guidance + +**5.1 For App Developers** + +**When to Use Schemas:** +- βœ… Always use schemas when available - they provide type safety and runtime validation +- βœ… Define rich TOutput types (Date objects, enums, computed fields) +- βœ… Let the schema handle transformations rather than manual conversion + +**Example: Rich Types in TOutput** +```typescript +// Good: Let users provide strings, store as Date +const schema = z.object({ + created_at: z.string().transform(val => new Date(val)) +}) + +// Bad: Forcing users to provide Date objects +const schema = z.object({ + created_at: z.date() // Users must call `new Date()` themselves +}) +``` + +**5.2 For Integration Authors (Electric, PowerSync, RxDB, etc.)** + +**Key Principle:** Your integration layer handles serialization between storage format and TOutput. + +```typescript +// Integration Flow + +// 1. Syncing FROM storage TO TanStack DB +sync: ({ write, collection }) => { + // Read from storage (e.g., SQLite) + const sqliteRow = { id: "1", created_at: "2024-01-01T00:00:00Z" } + + // Deserialize using schema: SQLite format β†’ TOutput + const transformed = collection.validateData(sqliteRow, 'insert') + // Result: { id: "1", created_at: Date } + + // Write TOutput to collection + write({ type: 'insert', value: transformed }) +} + +// 2. Persisting FROM TanStack DB TO storage +onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // This is TOutput + + // Serialize: TOutput β†’ storage format + const sqliteRow = { + ...item, + created_at: item.created_at.toISOString() // Date β†’ string + } + + // Write to storage + await db.execute("INSERT INTO ...", sqliteRow) +} +``` + +**Important:** +- βœ… Call `collection.validateData()` when syncing data INTO the collection +- βœ… Manually serialize TOutput when persisting data FROM the collection +- ❌ Don't constrain TOutput to match storage types +- ❌ Don't skip schema validation during sync + +--- + +### 6. Common Patterns & Best Practices + +**6.1 Date Handling** +```typescript +// Pattern: Accept strings, store as Date +const schema = z.object({ + timestamp: z.string().transform(val => new Date(val)) +}) + +// Pattern: Accept both for updates +const schema = z.object({ + timestamp: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val) +}) +``` + +**6.2 Timestamps with Defaults** +```typescript +const schema = z.object({ + id: z.string(), + created_at: z.date().default(() => new Date()), + updated_at: z.date().default(() => new Date()) +}) + +// Usage +collection.insert({ + id: "1" + // timestamps added automatically +}) +``` + +**6.3 Type-Safe Enums** +```typescript +const schema = z.object({ + status: z.enum(['draft', 'published', 'archived']).default('draft') +}) +``` + +**6.4 Nullable/Optional Fields** +```typescript +const schema = z.object({ + id: z.string(), + notes: z.string().optional(), // TInput: string | undefined, TOutput: string | undefined + deleted_at: z.date().nullable().default(null) // TInput: Date | null, TOutput: Date | null +}) +``` + +**6.5 Arrays with Defaults** +```typescript +const schema = z.object({ + tags: z.array(z.string()).default([]), + metadata: z.record(z.unknown()).default({}) +}) +``` + +--- + +### 7. Performance Considerations + +**Schema Validation Cost:** +- Schema validation runs on every `insert()` and `update()` +- Also runs during sync when calling `validateData()` +- For high-frequency updates, consider simpler schemas + +**Optimization Tips:** +```typescript +// Avoid expensive transforms in hot paths +const schema = z.object({ + id: z.string(), + data: z.string().transform(val => JSON.parse(val)) // Can be slow +}) + +// Better: Parse only when needed +const schema = z.object({ + id: z.string(), + data: z.string() // Store as string, parse in components +}) +``` + +--- + +### 8. Error Handling + +**Schema Validation Errors:** +```typescript +import { SchemaValidationError } from '@tanstack/db' + +try { + collection.insert({ + id: "1", + email: "invalid-email", + age: -5 + }) +} catch (error) { + if (error instanceof SchemaValidationError) { + console.log(error.type) // 'insert' or 'update' + console.log(error.issues) // Array of validation issues + + error.issues.forEach(issue => { + console.log(issue.path) // ['email'] or ['age'] + console.log(issue.message) // "Invalid email address" + }) + } +} +``` + +**In Sync Handlers:** +```typescript +sync: ({ write, begin, commit }) => { + begin() + for (const row of sqliteData) { + try { + const validated = collection.validateData(row, 'insert') + write({ type: 'insert', value: validated }) + } catch (error) { + if (error instanceof SchemaValidationError) { + // Handle invalid data from storage + console.error('Invalid data in storage:', error.issues) + continue // Skip this row + } + throw error + } + } + commit() +} +``` + +--- + +### 9. Complete Working Examples + +**Example 1: Todo App with Rich Types** +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string().min(1, "Todo text cannot be empty"), + completed: z.boolean().default(false), + priority: z.enum(['low', 'medium', 'high']).default('medium'), + due_date: z.string().transform(val => new Date(val)).optional(), + created_at: z.date().default(() => new Date()), + tags: z.array(z.string()).default([]) +}) + +const todoCollection = createCollection( + queryCollectionOptions({ + queryKey: ['todos'], + queryFn: async () => api.todos.getAll(), + getKey: (item) => item.id, + schema: todoSchema, + onInsert: async ({ transaction }) => { + const todo = transaction.mutations[0].modified // TOutput + + // Serialize for API + await api.todos.create({ + ...todo, + due_date: todo.due_date?.toISOString(), // Date β†’ string + created_at: todo.created_at.toISOString() + }) + } + }) +) + +// Usage - users provide simple inputs +todoCollection.insert({ + id: crypto.randomUUID(), + text: "Buy groceries", + due_date: "2024-12-31T23:59:59Z" + // completed, priority, created_at, tags filled automatically +}) + +// Reading returns rich types +const todo = todoCollection.get(id) +console.log(todo.due_date.getTime()) // It's a Date! +console.log(todo.priority) // Type-safe enum +``` + +**Example 2: E-commerce Product with Computed Fields** +```typescript +const productSchema = z.object({ + id: z.string(), + name: z.string(), + base_price: z.number().positive(), + tax_rate: z.number().min(0).max(1).default(0.1), + discount_percent: z.number().min(0).max(100).default(0) +}).transform(data => ({ + ...data, + // Computed field + final_price: data.base_price * (1 + data.tax_rate) * (1 - data.discount_percent / 100) +})) + +// User provides base data +collection.insert({ + id: "1", + name: "Widget", + base_price: 100, + discount_percent: 20 + // tax_rate defaults to 0.1 +}) + +// Reading returns computed field +const product = collection.get("1") +console.log(product.final_price) // 88 (100 * 1.1 * 0.8) +``` + +--- + +### 10. Related Topics + +**See Also:** +- [Mutations Guide](./mutations.md) - Using schemas with mutation handlers +- [Error Handling Guide](./error-handling.md) - Handling SchemaValidationError +- [Creating Collection Options](./collection-options-creator.md) - Integration authors: handling schemas in custom integrations +- [StandardSchema Specification](https://standardschema.dev) - Full schema specification + +--- + +## Changes to Existing Docs + +### 1. Update `overview.md` (lines 144-154) + +**Replace:** +```markdown +#### Collection schemas + +All collections optionally (though strongly recommended) support adding a `schema`. + +If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as a [Zod](https://zod.dev) or [Effect](https://effect.website/docs/schema/introduction/) schema. + +The collection will use the schema to do client-side validation of optimistic mutations. + +The collection will use the schema for its type so if you provide a schema, you can't also pass in an explicit +type (e.g. `createCollection()`). +``` + +**With:** +```markdown +#### Collection schemas + +All collections optionally (though strongly recommended) support adding a `schema`. + +If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as [Zod](https://zod.dev), [Valibot](https://valibot.dev), [ArkType](https://arktype.io), or [Effect](https://effect.website/docs/schema/introduction/) schemas. + +Schemas provide three key benefits: + +1. **Runtime validation**: Ensures data meets your constraints before entering the collection +2. **Type transformations**: Convert input types (strings) to rich output types (Date objects) +3. **Default values**: Automatically populate missing fields + +The collection will use the schema for its type, so if you provide a schema, you can't also pass in an explicit +type parameter (e.g., `createCollection()`). + +For a comprehensive guide on schema validation and type transformations, see the [Schemas guide](../guides/schemas.md). +``` + +### 2. Add to `mutations.md` (after line 154) + +Add a note in the mutation handlers section: + +```markdown +> [!TIP] +> Schemas automatically validate and transform data during mutations. For example, you can use schemas to convert string inputs to Date objects. See the [Schemas guide](./schemas.md) for details on schema validation and type transformations. +``` + +### 3. Update `collection-options-creator.md` (after line 66) + +Add a section on schemas: + +```markdown +### 3. Schema Handling + +When implementing a collection options creator for a sync engine, you must handle schema transformations correctly: + +```typescript +// When syncing FROM storage TO TanStack DB +sync: ({ write, collection }) => { + const storageData = await fetchFromStorage() + + // Deserialize: storage format β†’ TOutput + const transformed = collection.validateData(storageData, 'insert') + + // Write TOutput to collection + write({ type: 'insert', value: transformed }) +} + +// When persisting FROM TanStack DB TO storage +onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // TOutput + + // Serialize: TOutput β†’ storage format + const serialized = serializeForStorage(item) + + // Write to storage + await storage.write(serialized) +} +``` + +**Key principles:** +- Your integration layer handles serialization between storage format and TOutput +- Always call `collection.validateData()` when syncing data INTO the collection +- Manually serialize when persisting data FROM the collection to storage +- Don't constrain user schemas to match storage types + +For a comprehensive guide, see [Schemas guide](./schemas.md#integration-specific-guidance). +``` + +--- + +## Implementation Checklist + +- [ ] Create `docs/guides/schemas.md` with the content above +- [ ] Update `overview.md` collection schemas section +- [ ] Add schema tip to `mutations.md` +- [ ] Add schema handling section to `collection-options-creator.md` +- [ ] Add link to schemas guide in docs navigation +- [ ] Review and test all code examples +- [ ] Get feedback from integration authors (Electric, PowerSync, TrailBase teams) + +--- + +## Success Metrics + +After implementation, developers should be able to: + +1. βœ… Explain the difference between TInput and TOutput +2. βœ… Use schema transformations to convert types (e.g., string β†’ Date) +3. βœ… Apply default values in schemas +4. βœ… Handle both input and existing data in update schemas +5. βœ… Understand where schema validation happens in the system +6. βœ… (Integration authors) Correctly implement serialization/deserialization + +This should significantly reduce confusion like the PowerSync team experienced. From 2ac0f88b304b88a5389012783090944b1d07e944 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 31 Oct 2025 15:35:29 +0000 Subject: [PATCH 3/9] Add refined schema documentation plan based on deep investigation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After investigating all existing docs (overview, mutations, error-handling, live-queries, collection-options-creator) and examples, created a refined plan that addresses: KEY FINDING: Two distinct type conversion mechanisms 1. Integration-level parsing (storage format ↔ in-memory format) 2. Schema validation/transformation (TInput β†’ TOutput for mutations) The plan includes: - Analysis of what's currently documented (and gaps) - Comprehensive schemas.md guide structure (11 sections) - Specific updates to 5 existing docs with exact content - Separate guidance for app developers vs integration authors - Clear distinction between integration parsing and schema validation - Complete working examples and best practices - Implementation order and success criteria This directly addresses the PowerSync confusion about TInput/TOutput and provides clear guidance for both audiences. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- schema-documentation-plan-v2.md | 596 ++++++++++++++++++++++++++++++++ 1 file changed, 596 insertions(+) create mode 100644 schema-documentation-plan-v2.md diff --git a/schema-documentation-plan-v2.md b/schema-documentation-plan-v2.md new file mode 100644 index 000000000..a78b5a8db --- /dev/null +++ b/schema-documentation-plan-v2.md @@ -0,0 +1,596 @@ +# Schema Documentation Plan - v2 (Refined After Investigation) + +## Investigation Summary + +### What's Currently Documented + +**1. overview.md (lines 144-154)** +- βœ… Mentions schemas are optional but recommended +- βœ… Lists supported schema libraries (Zod, Effect) +- βœ… Says schemas do "client-side validation" +- ❌ No explanation of TInput/TOutput +- ❌ No transformation examples +- ❌ No mention of data flow + +**2. mutations.md (lines 495-560)** +- βœ… Shows Zod for **action parameter validation** +- ❌ Does NOT cover TInput/TOutput for collection data +- ❌ Does NOT show schema transformations + +**3. error-handling.md (lines 25-46)** +- βœ… Shows SchemaValidationError basics +- βœ… Shows error properties (type, issues, message) +- ❌ No explanation of when/why validation happens +- ❌ No transformation examples + +**4. live-queries.md** +- βœ… Mentions schema is optional for createLiveQueryCollection (line 95) +- ❌ No explanation of schema behavior +- ❌ Only says "result types are automatically inferred" + +**5. collection-options-creator.md (lines 174-220)** +- βœ… Has section on "Data Parsing and Type Conversion" +- βœ… Shows integration-specific `parse`/`serialize` functions +- ❌ This is DIFFERENT from schema validation +- ❌ Doesn't explain the relationship between the two + +### Key Finding: Two Distinct Mechanisms + +From examples and codebase, there are **TWO separate type conversion mechanisms**: + +1. **Integration-Level Parsing** (e.g., TrailBase's `parse/serialize`, Electric's `parser`) + - Purpose: Convert between storage format and in-memory format + - Layer: Sync layer (happens during `write()`) + - Example: Unix timestamp β†’ Date, WKB β†’ GeoJSON + - Used by: Integration authors + +2. **Schema Validation/Transformation** (the `schema` property) + - Purpose: Validate user input and transform TInput β†’ TOutput + - Layer: Mutation layer (happens during `insert()`/`update()`) + - Example: ISO string β†’ Date, applying defaults, validation + - Used by: App developers + +**These are complementary but serve different purposes!** + +--- + +## Proposed Documentation Strategy + +### Phase 1: Create New Comprehensive Guide + +**File:** `docs/guides/schemas.md` + +**Why a new guide?** +- Schemas affect mutations, queries, sync, AND error handling +- Content is substantial (~1500 lines with examples) +- Needs to serve both app developers AND integration authors +- Deserves prominent discoverability + +**Target Audiences:** +1. **App Developers** (80% of content) + - Understanding TInput/TOutput + - Using transformations + - Handling updates + - Error handling + - Best practices + +2. **Integration Authors** (20% of content) + - How schemas interact with sync + - When to use integration parsing vs schemas + - Calling `validateData()` correctly + - Handling serialization + +--- + +### Phase 2: Update Existing Docs + +#### 2.1 Update `overview.md` (lines 144-154) + +**Current:** +```markdown +#### Collection schemas + +All collections optionally (though strongly recommended) support adding a `schema`. + +If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as a [Zod](https://zod.dev) or [Effect](https://effect.website/docs/schema/introduction/) schema. + +The collection will use the schema to do client-side validation of optimistic mutations. +``` + +**Replace with:** +```markdown +#### Collection schemas + +All collections optionally (though strongly recommended) support adding a `schema`. + +If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as [Zod](https://zod.dev), [Valibot](https://valibot.dev), [ArkType](https://arktype.io), or [Effect](https://effect.website/docs/schema/introduction/). + +**What schemas do:** + +1. **Runtime validation** - Ensures data meets your constraints before entering the collection +2. **Type transformations** - Convert input types to rich output types (e.g., string β†’ Date) +3. **Default values** - Automatically populate missing fields +4. **Type safety** - Infer TypeScript types from your schema + +**Example:** +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + created_at: z.string().transform(val => new Date(val)), // string β†’ Date + priority: z.number().default(0) +}) + +const collection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + // ... + }) +) + +// Users provide simple inputs +collection.insert({ + id: "1", + text: "Buy groceries", + created_at: "2024-01-01T00:00:00Z" // string + // completed and priority filled automatically +}) + +// Collection stores and returns rich types +const todo = collection.get("1") +console.log(todo.created_at.getFullYear()) // It's a Date! +console.log(todo.completed) // false (default) +``` + +The collection will use the schema for its type inference. If you provide a schema, you cannot also pass an explicit type parameter (e.g., `createCollection()`). + +**Learn more:** See the [Schemas guide](../guides/schemas.md) for comprehensive documentation on schema validation, type transformations, and best practices. +``` + +#### 2.2 Add to `mutations.md` (after Operation Handlers section, ~line 394) + +Add a new section: + +```markdown +### Schema Validation in Mutation Handlers + +When a schema is configured, TanStack DB automatically validates and transforms data during mutations. The mutation handlers receive the **transformed data** (TOutput), not the raw input. + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.string().transform(val => new Date(val)) // TInput: string, TOutput: Date +}) + +const collection = createCollection({ + schema: todoSchema, + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified + + // item.created_at is already a Date object (TOutput) + console.log(item.created_at instanceof Date) // true + + // If your API needs a string, serialize it + await api.todos.create({ + ...item, + created_at: item.created_at.toISOString() // Date β†’ string + }) + } +}) + +// User provides string (TInput) +collection.insert({ + id: "1", + text: "Task", + created_at: "2024-01-01T00:00:00Z" +}) +``` + +**Key points:** +- Schema validation happens **before** mutation handlers are called +- Handlers receive **TOutput** (transformed data) +- If your backend needs a different format, serialize in the handler +- Schema validation errors throw `SchemaValidationError` before handlers run + +For comprehensive documentation on schema validation and transformations, see the [Schemas guide](./schemas.md). +``` + +#### 2.3 Update `error-handling.md` (lines 25-46) + +**Current section is good but add after line 46:** + +```markdown +**When schema validation occurs:** + +Schema validation happens at the **collection boundary** when data enters or is modified: + +1. **During inserts** - When `collection.insert()` is called +2. **During updates** - When `collection.update()` is called +3. **During sync writes** - When integration calls `collection.validateData()` + +The schema transforms **TInput** (user-provided data) into **TOutput** (stored data): + +```typescript +const schema = z.object({ + created_at: z.string().transform(val => new Date(val)) + // TInput: string, TOutput: Date +}) + +// Validation happens here ↓ +collection.insert({ + created_at: "2024-01-01" // TInput: string +}) +// If successful, stores: { created_at: Date } // TOutput: Date +``` + +For more details on schema validation and type transformations, see the [Schemas guide](./schemas.md). +``` + +#### 2.4 Update `collection-options-creator.md` (after line 220) + +**Add a new section after "Data Parsing and Type Conversion":** + +```markdown +### Integration Parsing vs Schema Validation + +Integration authors need to understand the **two distinct type conversion mechanisms**: + +#### 1. Integration-Level Parsing (`parse`/`serialize` or `parser`) + +This is **your responsibility** as an integration author. It converts between storage format and in-memory format. + +```typescript +// Example: TrailBase stores timestamps as Unix seconds +export function trailbaseCollectionOptions(config) { + return { + parse: { + created_at: (ts: number) => new Date(ts * 1000) // Unix timestamp β†’ Date + }, + serialize: { + created_at: (date: Date) => Math.floor(date.valueOf() / 1000) // Date β†’ Unix timestamp + }, + // This happens during sync write() + } +} +``` + +**When to use:** When your storage layer uses different types than TanStack DB (e.g., Unix timestamps, WKB geometry, JSON strings). + +**Where it happens:** In the sync layer, during `write()` operations. + +#### 2. Schema Validation (the `schema` property) + +This is **the user's choice**. They can optionally provide a schema that validates and transforms data during mutations. + +```typescript +// User-defined schema +const todoSchema = z.object({ + id: z.string(), + created_at: z.string().transform(val => new Date(val)) // string β†’ Date +}) + +const collection = createCollection( + myCollectionOptions({ + schema: todoSchema, // User provides this + // ... + }) +) +``` + +**When to use (for users):** For input validation, transformations, and defaults during mutations. + +**Where it happens:** At the mutation layer, during `insert()`/`update()`. + +#### How They Work Together + +```typescript +// 1. User calls insert with TInput +collection.insert({ + id: "1", + created_at: "2024-01-01T00:00:00Z" // string +}) + +// 2. Schema validates/transforms: string β†’ Date (if schema is provided) +// Result: { id: "1", created_at: Date } // TOutput + +// 3. Your onInsert handler receives TOutput +onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // { created_at: Date } + + // 4. Serialize for your storage layer + const storageFormat = { + ...item, + created_at: Math.floor(item.created_at.valueOf() / 1000) // Date β†’ Unix timestamp + } + + // 5. Write to storage + await storage.write(storageFormat) +} + +// 6. When syncing back FROM storage: +sync: ({ write, collection }) => { + const storageRow = { id: "1", created_at: 1704067200 } // Unix timestamp + + // 7. Parse from storage format + const parsed = { + ...storageRow, + created_at: new Date(storageRow.created_at * 1000) // Unix β†’ Date + } + + // 8. Optionally validate with schema + const validated = collection.validateData(parsed, 'insert') + + // 9. Write to collection as TOutput + write({ type: 'insert', value: validated }) +} +``` + +#### Best Practices for Integration Authors + +1. **Always call `collection.validateData()`** when syncing data INTO the collection +2. **Serialize in mutation handlers** when persisting data FROM the collection +3. **Don't constrain user schemas** - let users define rich TOutput types +4. **Document your parsing requirements** - explain what formats your storage uses +5. **Provide good TypeScript types** - use generics to support user schemas + +**Example: Calling validateData() during sync** + +```typescript +export function myCollectionOptions(config) { + return { + sync: { + sync: ({ write, begin, commit, collection }) => { + // Read from your storage + const storageData = await fetchFromStorage() + + begin() + for (const row of storageData) { + // Parse from storage format + const parsed = parseFromStorageFormat(row) + + // Validate and transform using user's schema (if provided) + const validated = collection.validateData(parsed, 'insert') + + // Write TOutput to collection + write({ type: 'insert', value: validated }) + } + commit() + } + }, + + onInsert: async ({ transaction }) => { + const items = transaction.mutations.map(m => m.modified) // TOutput + + // Serialize for your storage + const serialized = items.map(item => serializeForStorage(item)) + + // Write to storage + await storage.bulkWrite(serialized) + } + } +} +``` + +For comprehensive documentation on schemas from a user perspective, see the [Schemas guide](./schemas.md). +``` + +--- + +### Phase 3: Create the New Schemas Guide + +**File:** `docs/guides/schemas.md` + +**Structure (detailed outline):** + +#### 1. Introduction (5 min read) +- What schemas do in TanStack DB +- Why use them (type safety, validation, transformations) +- StandardSchema compatibility + +#### 2. Core Concepts: TInput vs TOutput (5 min) +- Clear explanation with diagrams +- Data flow through the system +- Where validation happens + +```typescript +const schema = z.object({ + created_at: z.string().transform(val => new Date(val)) + // TInput: string (what users provide) + // TOutput: Date (what's stored and returned) +}) +``` + +#### 3. Data Flow Diagram +Visual showing the journey from user input β†’ validation β†’ storage β†’ queries + +``` +User Input (TInput) + ↓ +collection.insert() + ↓ +Schema Validation & Transformation + ↓ +Collection Storage (TOutput) + ↓ +Queries & Reads (TOutput) +``` + +#### 4. Transformation Examples (10 min) + +**4.1 Type Conversions** +- String β†’ Date +- Number β†’ Enum +- JSON string β†’ Object +- Computed fields + +**4.2 Default Values** +- `.default()` with literals +- `.default()` with functions +- Arrays and objects + +**4.3 Validation & Constraints** +- `.min()`, `.max()`, `.email()`, etc. +- Custom validation +- Error messages + +#### 5. Handling Updates (10 min) + +**The Challenge:** Existing data is TOutput, but users provide TInput + +**Solution:** Union types + +```typescript +const schema = z.object({ + created_at: z.union([ + z.string(), // New input + z.date() // Existing data + ]).transform(val => typeof val === 'string' ? new Date(val) : val) +}) +``` + +#### 6. For App Developers (15 min) + +**6.1 When to Use Schemas** +- Always recommended +- Benefits list + +**6.2 Common Patterns** +- Date handling (with and without unions) +- Timestamps with defaults +- Type-safe enums +- Nullable/optional fields +- Arrays with defaults + +**6.3 Best Practices** +- Prefer rich TOutput types +- Use unions for updates +- Keep transformations simple +- Consider performance + +**6.4 Complete Example** +Full working todo app with schema + +#### 7. For Integration Authors (10 min) + +**7.1 Understanding the Boundary** +- Schema validation vs integration parsing +- When each happens +- How they work together + +**7.2 Calling validateData()** +```typescript +// When syncing TO collection +const validated = collection.validateData(row, 'insert') +write({ type: 'insert', value: validated }) +``` + +**7.3 Serializing in Handlers** +```typescript +// When persisting FROM collection +onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // TOutput + const serialized = serializeForStorage(item) + await storage.write(serialized) +} +``` + +**7.4 Best Practices** +- Always call validateData() during sync +- Don't constrain user schemas to storage types +- Handle validation errors gracefully + +#### 8. Error Handling (5 min) +- SchemaValidationError structure +- Catching and displaying errors +- Handling invalid sync data + +#### 9. Performance Considerations (3 min) +- When validation happens +- Cost of complex transformations +- Optimization tips + +#### 10. Complete Working Examples (10 min) +- Todo app with rich types +- E-commerce product with computed fields +- Multi-collection transaction + +#### 11. Related Topics +- Links to mutations.md, error-handling.md, collection-options-creator.md +- Link to StandardSchema spec + +--- + +## Implementation Order + +1. βœ… **Create schemas.md** (the comprehensive guide) +2. βœ… **Update overview.md** (expand collection schemas section, add example) +3. βœ… **Update mutations.md** (add schema validation section) +4. βœ… **Update error-handling.md** (add "when schema validation occurs") +5. βœ… **Update collection-options-creator.md** (add "Integration Parsing vs Schema Validation" section) +6. ⏭️ **Update navigation** (add schemas.md to docs navigation/sidebar) +7. ⏭️ **Review examples** (ensure they follow best practices) +8. ⏭️ **Get feedback** (from integration authors and community) + +--- + +## Success Criteria + +After implementation, developers should be able to: + +### App Developers +1. βœ… Explain TInput vs TOutput +2. βœ… Use `.transform()` to convert types +3. βœ… Apply default values with `.default()` +4. βœ… Handle both new input and existing data in update schemas +5. βœ… Understand when schema validation happens +6. βœ… Debug SchemaValidationError + +### Integration Authors +7. βœ… Distinguish between integration parsing and schema validation +8. βœ… Know when to call `collection.validateData()` +9. βœ… Understand where to serialize/deserialize +10. βœ… Avoid constraining user schemas to storage types + +--- + +## Key Insights from Investigation + +1. **Two Mechanisms Exist:** Integration parsing (storage format) and schema validation (user input) serve different purposes + +2. **Real-World Usage:** Examples show: + - Union types for handling both string and Date + - Integration-specific parsing (Electric `parser`, TrailBase `parse/serialize`) + - Schemas for validation and defaults + +3. **Current Gap:** No documentation explains: + - TInput vs TOutput concept + - How the two mechanisms relate + - When to use which approach + - Best practices for either audience + +4. **Documentation Spread:** Schema-related content currently in 4 docs but none comprehensive + +5. **PowerSync Confusion:** Their question proves the need - they didn't understand: + - TOutput should be rich types + - Integration layer handles serialization + - Schema validation vs sync parsing + +--- + +## Next Steps + +1. Get approval on this refined plan +2. Implement schemas.md with full content +3. Make targeted updates to existing docs +4. Add navigation links +5. Review with integration authors (Electric, PowerSync, TrailBase teams) +6. Collect feedback and iterate + +This approach provides: +- βœ… Comprehensive coverage in one place (schemas.md) +- βœ… Targeted updates to existing docs (not overwhelming) +- βœ… Clear distinction between app dev and integration author concerns +- βœ… Addresses PowerSync-type confusion directly +- βœ… Builds on existing example patterns From 486bd8a7ce0bc82aaff934f52d210402a85dab2a Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 31 Oct 2025 15:59:46 +0000 Subject: [PATCH 4/9] Add comprehensive schemas guide for TanStack DB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New guide (docs/guides/schemas.md) covering: - Introduction with validation-first example - Core concepts: TInput vs TOutput with data flow diagram - Validation patterns (types, strings, numbers, enums, arrays, custom) - Transformation patterns (Date conversion, JSON, computed fields) - Default values (literals, functions, complex) - Handling updates with union types pattern - Error handling with SchemaValidationError - Best practices (with performance callout) - Two complete working examples (Todo app, E-commerce) - Brief integration authors section linking to collection-options-creator - Related topics links This addresses the documentation gap identified in the PowerSync question about TInput/TOutput and provides clear guidance for both app developers and integration authors on schema validation and type transformations. ~850 lines, 12 sections, 30+ code examples πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/guides/schemas.md | 1317 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1317 insertions(+) create mode 100644 docs/guides/schemas.md diff --git a/docs/guides/schemas.md b/docs/guides/schemas.md new file mode 100644 index 000000000..d7c791e1f --- /dev/null +++ b/docs/guides/schemas.md @@ -0,0 +1,1317 @@ +--- +title: Schemas +id: schemas +--- + +# Schema Validation and Type Transformations + +TanStack DB uses schemas to ensure your data is valid and type-safe throughout your application. + +## What You'll Learn + +This guide covers: +- How schema validation works in TanStack DB +- Understanding TInput and TOutput types +- Common patterns: validation, transformations, and defaults +- Error handling and best practices + +## Quick Example + +Schemas catch invalid data before it enters your collection: + +```typescript +import { z } from 'zod' +import { createCollection } from '@tanstack/react-db' +import { queryCollectionOptions } from '@tanstack/query-db-collection' + +const todoSchema = z.object({ + id: z.string(), + text: z.string().min(1, "Text is required"), + completed: z.boolean(), + priority: z.number().min(0).max(5) +}) + +const collection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + queryKey: ['todos'], + queryFn: async () => api.todos.getAll(), + getKey: (item) => item.id, + // ... + }) +) + +// Invalid data throws SchemaValidationError +collection.insert({ + id: "1", + text: "", // ❌ Too short + completed: "yes", // ❌ Wrong type + priority: 10 // ❌ Out of range +}) +// Error: Validation failed with 3 issues + +// Valid data works +collection.insert({ + id: "1", + text: "Buy groceries", // βœ… + completed: false, // βœ… + priority: 2 // βœ… +}) +``` + +Schemas also enable advanced features like type transformations and defaults: + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string().min(1), + completed: z.boolean().default(false), // Auto-fill missing values + created_at: z.string().transform(val => new Date(val)) // Convert types +}) + +collection.insert({ + id: "1", + text: "Buy groceries", + created_at: "2024-01-01T00:00:00Z" // String in + // completed auto-filled with false +}) + +const todo = collection.get("1") +console.log(todo.created_at.getFullYear()) // Date object out! +``` + +## Supported Schema Libraries + +TanStack DB supports any [StandardSchema](https://standardschema.dev) compatible library: +- [Zod](https://zod.dev) +- [Valibot](https://valibot.dev) +- [ArkType](https://arktype.io) +- [Effect Schema](https://effect.website/docs/schema/introduction/) + +Examples in this guide use Zod, but patterns apply to all libraries. + +--- + +## Core Concepts: TInput vs TOutput + +Understanding TInput and TOutput is key to working effectively with schemas in TanStack DB. + +### What are TInput and TOutput? + +When you define a schema with transformations, it has two types: + +- **TInput**: The type users provide when calling `insert()` or `update()` +- **TOutput**: The type stored in the collection and returned from queries + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.string().transform(val => new Date(val)) +}) + +// TInput type: { id: string, text: string, created_at: string } +// TOutput type: { id: string, text: string, created_at: Date } +``` + +The schema acts as a **boundary** that transforms TInput β†’ TOutput. + +### Why This Matters + +**All data in your collection is TOutput:** +- Data stored in the collection +- Data returned from queries +- Data in `PendingMutation.modified` +- Data in mutation handlers + +```typescript +const collection = createCollection({ + schema: todoSchema, + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified + + // item is TOutput + console.log(item.created_at instanceof Date) // true + + // If your API needs a string, serialize it + await api.todos.create({ + ...item, + created_at: item.created_at.toISOString() // Date β†’ string + }) + } +}) + +// User provides TInput +collection.insert({ + id: "1", + text: "Task", + created_at: "2024-01-01T00:00:00Z" // string +}) + +// Collection stores and returns TOutput +const todo = collection.get("1") +console.log(todo.created_at.getFullYear()) // It's a Date! +``` + +### The Data Flow + +Here's how data flows through the system: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ User Code / API Response β”‚ +β”‚ (TInput format) β”‚ +β”‚ { created_at: "2024-01-01T00:00:00Z" } β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ collection.insert(data) β”‚ +β”‚ or β”‚ +β”‚ collection.validateData(data) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Schema Validation & Transformation β”‚ +β”‚ (TInput β†’ TOutput) β”‚ +β”‚ β”‚ +β”‚ 1. Validate types and constraints β”‚ +β”‚ 2. Apply transformations (.transform()) β”‚ +β”‚ 3. Apply defaults (.default()) β”‚ +β”‚ 4. Return validated TOutput β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Collection Storage β”‚ +β”‚ (TOutput format) β”‚ +β”‚ { created_at: Date object } β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Queries / Reads / Mutation Handlers β”‚ +β”‚ (TOutput format) β”‚ +β”‚ { created_at: Date object } β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +**Key points:** +1. Schema validation happens at the **collection boundary** +2. **Everything inside the collection is TOutput** +3. Validation runs during `insert()`, `update()`, and `validateData()` + +--- + +## Validation Patterns + +Schemas provide powerful validation to ensure data quality. + +### Basic Type Validation + +```typescript +const userSchema = z.object({ + id: z.string(), + name: z.string(), + age: z.number(), + email: z.string().email(), + active: z.boolean() +}) + +collection.insert({ + id: "1", + name: "Alice", + age: "25", // ❌ Wrong type - expects number + email: "not-an-email", // ❌ Invalid email format + active: true +}) +// Throws SchemaValidationError +``` + +### String Constraints + +```typescript +const productSchema = z.object({ + id: z.string(), + name: z.string().min(3, "Name must be at least 3 characters"), + sku: z.string().length(8, "SKU must be exactly 8 characters"), + description: z.string().max(500, "Description too long"), + url: z.string().url("Must be a valid URL") +}) +``` + +### Number Constraints + +```typescript +const orderSchema = z.object({ + id: z.string(), + quantity: z.number() + .int("Must be a whole number") + .positive("Must be greater than 0"), + price: z.number() + .min(0.01, "Price must be at least $0.01") + .max(999999.99, "Price too high"), + discount: z.number() + .min(0) + .max(100) +}) +``` + +### Enum Validation + +```typescript +const taskSchema = z.object({ + id: z.string(), + status: z.enum(['todo', 'in-progress', 'done']), + priority: z.enum(['low', 'medium', 'high', 'urgent']) +}) + +collection.insert({ + id: "1", + status: "completed", // ❌ Not in enum + priority: "medium" // βœ… +}) +``` + +### Optional and Nullable Fields + +```typescript +const personSchema = z.object({ + id: z.string(), + name: z.string(), + nickname: z.string().optional(), // Can be omitted + middleName: z.string().nullable(), // Can be null + bio: z.string().optional().nullable() // Can be omitted OR null +}) + +// All valid: +collection.insert({ id: "1", name: "Alice" }) // nickname omitted +collection.insert({ id: "2", name: "Bob", middleName: null }) +collection.insert({ id: "3", name: "Carol", bio: null }) +``` + +### Array Validation + +```typescript +const postSchema = z.object({ + id: z.string(), + title: z.string(), + tags: z.array(z.string()).min(1, "At least one tag required"), + likes: z.array(z.number()).max(1000) +}) + +collection.insert({ + id: "1", + title: "My Post", + tags: [], // ❌ Need at least one + likes: [1, 2, 3] +}) +``` + +### Custom Validation + +```typescript +const userSchema = z.object({ + id: z.string(), + username: z.string() + .min(3) + .refine( + (val) => /^[a-zA-Z0-9_]+$/.test(val), + "Username can only contain letters, numbers, and underscores" + ), + password: z.string() + .min(8) + .refine( + (val) => /[A-Z]/.test(val) && /[0-9]/.test(val), + "Password must contain at least one uppercase letter and one number" + ) +}) +``` + +### Cross-Field Validation + +```typescript +const dateRangeSchema = z.object({ + id: z.string(), + start_date: z.string(), + end_date: z.string() +}).refine( + (data) => new Date(data.end_date) > new Date(data.start_date), + "End date must be after start date" +) +``` + +--- + +## Transformation Patterns + +Schemas can transform data as it enters your collection. + +### String to Date + +The most common transformation - convert ISO strings to Date objects: + +```typescript +const eventSchema = z.object({ + id: z.string(), + name: z.string(), + start_time: z.string().transform(val => new Date(val)) +}) + +collection.insert({ + id: "1", + name: "Conference", + start_time: "2024-06-15T10:00:00Z" // TInput: string +}) + +const event = collection.get("1") +console.log(event.start_time.getFullYear()) // TOutput: Date +``` + +### String to Number + +```typescript +const formSchema = z.object({ + id: z.string(), + quantity: z.string().transform(val => parseInt(val, 10)), + price: z.string().transform(val => parseFloat(val)) +}) + +collection.insert({ + id: "1", + quantity: "42", // String from form input + price: "19.99" +}) + +const item = collection.get("1") +console.log(typeof item.quantity) // "number" +``` + +### JSON String to Object + +```typescript +const configSchema = z.object({ + id: z.string(), + settings: z.string().transform(val => JSON.parse(val)) +}) + +collection.insert({ + id: "1", + settings: '{"theme":"dark","notifications":true}' // JSON string +}) + +const config = collection.get("1") +console.log(config.settings.theme) // "dark" (parsed object) +``` + +### Computed Fields + +```typescript +const userSchema = z.object({ + id: z.string(), + first_name: z.string(), + last_name: z.string() +}).transform(data => ({ + ...data, + full_name: `${data.first_name} ${data.last_name}` // Computed +})) + +collection.insert({ + id: "1", + first_name: "John", + last_name: "Doe" +}) + +const user = collection.get("1") +console.log(user.full_name) // "John Doe" +``` + +### String to Enum + +```typescript +const orderSchema = z.object({ + id: z.string(), + status: z.string().transform(val => + val.toUpperCase() as 'PENDING' | 'SHIPPED' | 'DELIVERED' + ) +}) +``` + +### Sanitization + +```typescript +const commentSchema = z.object({ + id: z.string(), + text: z.string().transform(val => val.trim()), // Remove whitespace + username: z.string().transform(val => val.toLowerCase()) // Normalize +}) +``` + +### Complex Transformations + +```typescript +const productSchema = z.object({ + id: z.string(), + name: z.string(), + price_cents: z.number() +}).transform(data => ({ + ...data, + price_dollars: data.price_cents / 100, // Add computed field + display_price: `$${(data.price_cents / 100).toFixed(2)}` // Formatted +})) +``` + +--- + +## Default Values + +Schemas can automatically provide default values for missing fields. + +### Literal Defaults + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + priority: z.number().default(0), + tags: z.array(z.string()).default([]) +}) + +collection.insert({ + id: "1", + text: "Buy groceries" + // completed, priority, and tags filled automatically +}) + +const todo = collection.get("1") +console.log(todo.completed) // false +console.log(todo.priority) // 0 +console.log(todo.tags) // [] +``` + +### Function Defaults + +Generate defaults dynamically: + +```typescript +const postSchema = z.object({ + id: z.string(), + title: z.string(), + created_at: z.date().default(() => new Date()), + view_count: z.number().default(0), + slug: z.string().default(() => crypto.randomUUID()) +}) + +collection.insert({ + id: "1", + title: "My First Post" + // created_at, view_count, and slug generated automatically +}) +``` + +### Conditional Defaults + +```typescript +const userSchema = z.object({ + id: z.string(), + username: z.string(), + role: z.enum(['user', 'admin']).default('user'), + permissions: z.array(z.string()).default(['read']) +}) +``` + +### Complex Defaults + +```typescript +const eventSchema = z.object({ + id: z.string(), + name: z.string(), + metadata: z.record(z.unknown()).default(() => ({ + created_by: 'system', + version: 1 + })) +}) +``` + +### Combining Defaults with Transformations + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + created_at: z.string() + .default(() => new Date().toISOString()) + .transform(val => new Date(val)) +}) + +collection.insert({ + id: "1", + text: "Task" + // completed defaults to false + // created_at defaults to current time, then transforms to Date +}) +``` + +--- + +## Handling Updates + +When updating data, your schema needs to handle both new input (TInput) and existing data (already TOutput). + +### The Challenge + +Consider this schema: + +```typescript +const todoSchema = z.object({ + id: z.string(), + created_at: z.string().transform(val => new Date(val)) +}) +``` + +**Problem:** During updates, `created_at` is already a Date (TOutput), but the transform expects a string (TInput). The validation will fail! + +```typescript +// Initial insert works +collection.insert({ + id: "1", + created_at: "2024-01-01T00:00:00Z" // string β†’ Date +}) + +// Update fails! +collection.update("1", (draft) => { + draft.text = "Updated" + // draft.created_at is already a Date, but schema expects string +}) +``` + +### Solution: Union Types + +Accept both the input type and the output type: + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.union([ + z.string(), // Accept string (new input) + z.date() // Accept Date (existing data) + ]).transform(val => + typeof val === 'string' ? new Date(val) : val + ) +}) +``` + +Now both inserts and updates work: + +```typescript +// Insert with string (TInput) +collection.insert({ + id: "1", + text: "Task", + created_at: "2024-01-01T00:00:00Z" // string +}) + +// Update works - created_at is already a Date +collection.update("1", (draft) => { + draft.text = "Updated" // created_at stays as Date +}) + +// Can also update with a new string +collection.update("1", (draft) => { + draft.updated_at = "2024-01-02T00:00:00Z" // string β†’ Date +}) +``` + +### Pattern: Union Transform Helper + +For schemas with many date fields, create a helper: + +```typescript +const dateField = z.union([ + z.string(), + z.date() +]).transform(val => typeof val === 'string' ? new Date(val) : val) + +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: dateField, + updated_at: dateField, + completed_at: dateField.optional() +}) +``` + +### When You Don't Need Unions + +If your schema doesn't have transformations, you don't need unions: + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + priority: z.number().default(0) +}) + +// TInput === TOutput (no transformations) +// Updates work fine without unions +``` + +### Optional Fields in Updates + +For partial updates, use `.partial()`: + +```typescript +const insertSchema = z.object({ + id: z.string(), + name: z.string(), + email: z.string().email(), + age: z.number() +}) + +const updateSchema = insertSchema.partial() + +// Now all fields except id are optional for updates +collection.update("1", { name: "Updated Name" }) // OK +``` + +--- + +## Error Handling + +When validation fails, TanStack DB throws a `SchemaValidationError` with detailed information. + +### Basic Error Handling + +```typescript +import { SchemaValidationError } from '@tanstack/db' + +try { + collection.insert({ + id: "1", + email: "not-an-email", + age: -5 + }) +} catch (error) { + if (error instanceof SchemaValidationError) { + console.log(error.type) // 'insert' or 'update' + console.log(error.message) // "Validation failed with 2 issues" + console.log(error.issues) // Array of validation issues + } +} +``` + +### Error Structure + +```typescript +error.issues = [ + { + path: ['email'], + message: 'Invalid email address' + }, + { + path: ['age'], + message: 'Number must be greater than 0' + } +] +``` + +### Displaying Errors in UI + +```typescript +const handleSubmit = async (data: unknown) => { + try { + collection.insert(data) + } catch (error) { + if (error instanceof SchemaValidationError) { + // Show errors by field + error.issues.forEach(issue => { + const fieldName = issue.path?.join('.') || 'unknown' + showFieldError(fieldName, issue.message) + }) + } + } +} +``` + +### React Example + +```tsx +import { SchemaValidationError } from '@tanstack/db' + +function TodoForm() { + const [errors, setErrors] = useState>({}) + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault() + setErrors({}) + + try { + todoCollection.insert({ + id: crypto.randomUUID(), + text: e.currentTarget.text.value, + priority: parseInt(e.currentTarget.priority.value) + }) + } catch (error) { + if (error instanceof SchemaValidationError) { + const newErrors: Record = {} + error.issues.forEach(issue => { + const field = issue.path?.[0] || 'form' + newErrors[field] = issue.message + }) + setErrors(newErrors) + } + } + } + + return ( +
+ + {errors.text && {errors.text}} + + + {errors.priority && {errors.priority}} + + +
+ ) +} +``` + +### Handling Sync Validation Errors + +When syncing data into your collection, handle validation errors gracefully: + +```typescript +sync: { + sync: ({ write, begin, commit, collection }) => { + const data = await fetchFromAPI() + + begin() + for (const item of data) { + try { + const validated = collection.validateData(item, 'insert') + write({ type: 'insert', value: validated }) + } catch (error) { + if (error instanceof SchemaValidationError) { + // Log but don't stop sync + console.error(`Invalid data from server:`, item, error.issues) + continue // Skip this item + } + throw error // Re-throw other errors + } + } + commit() + } +} +``` + +### Safe Parsing (Zod) + +For cases where you want a result type instead of throwing: + +```typescript +const result = todoSchema.safeParse(data) + +if (result.success) { + collection.insert(result.data) +} else { + console.error(result.error.issues) +} +``` + +--- + +## Best Practices + +### When to Use Schemas + +βœ… **Use schemas when you want:** +- Runtime validation of user input +- Type transformations (string β†’ Date, etc.) +- Automatic default values +- Better TypeScript inference +- Validation error messages + +❌ **You might not need schemas if:** +- Your data is already validated (e.g., from a type-safe backend) +- You don't need transformations or defaults +- Performance is critical and validation would be a bottleneck + +### Keep Transformations Simple + +> **Performance Note:** Schema validation is synchronous and runs on every optimistic mutation. For high-frequency updates, keep transformations simple. + +```typescript +// ❌ Avoid expensive operations +const schema = z.object({ + data: z.string().transform(val => { + // Heavy computation on every mutation + return expensiveParsingOperation(val) + }) +}) + +// βœ… Better: Validate only, process elsewhere +const schema = z.object({ + data: z.string() // Simple validation +}) + +// Process in component or mutation handler when needed +const processedData = expensiveParsingOperation(todo.data) +``` + +### Use Union Types for Updates + +Always use union types when transforming to different output types: + +```typescript +// βœ… Good: Handles both input and existing data +const schema = z.object({ + created_at: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val) +}) + +// ❌ Bad: Will fail on updates +const schema = z.object({ + created_at: z.string().transform(val => new Date(val)) +}) +``` + +### Validate at the Boundary + +Let the collection schema handle validation. Don't duplicate validation logic: + +```typescript +// ❌ Avoid: Duplicate validation +function addTodo(text: string) { + if (!text || text.length < 3) { + throw new Error("Text too short") + } + todoCollection.insert({ id: "1", text }) +} + +// βœ… Better: Let schema handle it +const todoSchema = z.object({ + id: z.string(), + text: z.string().min(3, "Text must be at least 3 characters") +}) +``` + +### Type Inference + +Let TypeScript infer types from your schema: + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean() +}) + +type Todo = z.infer // Inferred type + +// βœ… Use the inferred type +const collection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + // TypeScript knows the item type automatically + getKey: (item) => item.id // item is Todo + }) +) +``` + +### Custom Error Messages + +Provide helpful error messages for users: + +```typescript +const userSchema = z.object({ + username: z.string() + .min(3, "Username must be at least 3 characters") + .max(20, "Username is too long (max 20 characters)") + .regex(/^[a-zA-Z0-9_]+$/, "Username can only contain letters, numbers, and underscores"), + email: z.string().email("Please enter a valid email address"), + age: z.number() + .int("Age must be a whole number") + .min(13, "You must be at least 13 years old") +}) +``` + +### Schema Organization + +For large schemas, organize by domain: + +```typescript +// schemas/user.ts +export const userSchema = z.object({ + id: z.string(), + username: z.string().min(3), + email: z.string().email() +}) + +// schemas/todo.ts +export const todoSchema = z.object({ + id: z.string(), + text: z.string().min(1), + user_id: z.string() +}) + +// collections/todos.ts +import { todoSchema } from '../schemas/todo' + +export const todoCollection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + // ... + }) +) +``` + +--- + +## Full-Context Examples + +### Example 1: Todo App with Rich Types + +A complete todo application demonstrating validation, transformations, and defaults: + +```typescript +import { z } from 'zod' +import { createCollection } from '@tanstack/react-db' +import { queryCollectionOptions } from '@tanstack/query-db-collection' + +// Schema with validation, transformations, and defaults +const todoSchema = z.object({ + id: z.string(), + text: z.string().min(1, "Todo text cannot be empty"), + completed: z.boolean().default(false), + priority: z.enum(['low', 'medium', 'high']).default('medium'), + due_date: z.union([ + z.string(), + z.date() + ]).transform(val => typeof val === 'string' ? new Date(val) : val).optional(), + created_at: z.union([ + z.string(), + z.date() + ]).transform(val => typeof val === 'string' ? new Date(val) : val) + .default(() => new Date()), + tags: z.array(z.string()).default([]) +}) + +type Todo = z.infer + +// Collection setup +const todoCollection = createCollection( + queryCollectionOptions({ + queryKey: ['todos'], + queryFn: async () => { + const response = await fetch('/api/todos') + const todos = await response.json() + // API returns ISO strings for dates + return todos + }, + getKey: (item) => item.id, + schema: todoSchema, + queryClient, + + onInsert: async ({ transaction }) => { + const todo = transaction.mutations[0].modified + + // Serialize dates for API + await fetch('/api/todos', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + ...todo, + due_date: todo.due_date?.toISOString(), + created_at: todo.created_at.toISOString() + }) + }) + }, + + onUpdate: async ({ transaction }) => { + await Promise.all( + transaction.mutations.map(async (mutation) => { + const { original, changes } = mutation + + // Serialize any date fields in changes + const serialized = { + ...changes, + due_date: changes.due_date instanceof Date + ? changes.due_date.toISOString() + : changes.due_date + } + + await fetch(`/api/todos/${original.id}`, { + method: 'PATCH', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(serialized) + }) + }) + ) + }, + + onDelete: async ({ transaction }) => { + await Promise.all( + transaction.mutations.map(async (mutation) => { + await fetch(`/api/todos/${mutation.original.id}`, { + method: 'DELETE' + }) + }) + ) + } + }) +) + +// Component usage +function TodoApp() { + const { data: todos } = useLiveQuery(q => + q.from({ todo: todoCollection }) + .where(({ todo }) => !todo.completed) + .orderBy(({ todo }) => todo.created_at, 'desc') + ) + + const [errors, setErrors] = useState>({}) + + const addTodo = (text: string, priority: 'low' | 'medium' | 'high') => { + try { + todoCollection.insert({ + id: crypto.randomUUID(), + text, + priority, + due_date: "2024-12-31T23:59:59Z" + // completed, created_at, tags filled automatically by defaults + }) + setErrors({}) + } catch (error) { + if (error instanceof SchemaValidationError) { + const newErrors: Record = {} + error.issues.forEach(issue => { + const field = issue.path?.[0] || 'form' + newErrors[field] = issue.message + }) + setErrors(newErrors) + } + } + } + + const toggleComplete = (todo: Todo) => { + todoCollection.update(todo.id, (draft) => { + draft.completed = !draft.completed + }) + } + + return ( +
+

Todos

+ + {errors.text &&
{errors.text}
} + + + +
    + {todos?.map(todo => ( +
  • + toggleComplete(todo)} + /> + {todo.text} + Priority: {todo.priority} + {todo.due_date && ( + Due: {todo.due_date.toLocaleDateString()} + )} + Created: {todo.created_at.toLocaleDateString()} +
  • + ))} +
+
+ ) +} +``` + +### Example 2: E-commerce Product with Computed Fields + +```typescript +import { z } from 'zod' + +// Schema with computed fields and transformations +const productSchema = z.object({ + id: z.string(), + name: z.string().min(3, "Product name must be at least 3 characters"), + description: z.string().max(500, "Description too long"), + base_price: z.number().positive("Price must be positive"), + tax_rate: z.number().min(0).max(1).default(0.1), + discount_percent: z.number().min(0).max(100).default(0), + stock: z.number().int().min(0).default(0), + category: z.enum(['electronics', 'clothing', 'food', 'other']), + tags: z.array(z.string()).default([]), + created_at: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val) + .default(() => new Date()) +}).transform(data => ({ + ...data, + // Computed fields + final_price: data.base_price * (1 + data.tax_rate) * (1 - data.discount_percent / 100), + in_stock: data.stock > 0, + display_price: `$${(data.base_price * (1 + data.tax_rate) * (1 - data.discount_percent / 100)).toFixed(2)}` +})) + +type Product = z.infer + +const productCollection = createCollection( + queryCollectionOptions({ + queryKey: ['products'], + queryFn: async () => api.products.getAll(), + getKey: (item) => item.id, + schema: productSchema, + queryClient, + + onInsert: async ({ transaction }) => { + const product = transaction.mutations[0].modified + + // API only needs base fields, not computed ones + await api.products.create({ + name: product.name, + description: product.description, + base_price: product.base_price, + tax_rate: product.tax_rate, + discount_percent: product.discount_percent, + stock: product.stock, + category: product.category, + tags: product.tags + }) + } + }) +) + +// Usage +function ProductList() { + const { data: products } = useLiveQuery(q => + q.from({ product: productCollection }) + .where(({ product }) => product.in_stock) // Use computed field + .orderBy(({ product }) => product.final_price, 'asc') + ) + + const addProduct = () => { + productCollection.insert({ + id: crypto.randomUUID(), + name: "Wireless Mouse", + description: "Ergonomic wireless mouse", + base_price: 29.99, + discount_percent: 10, + category: "electronics", + stock: 50 + // tax_rate, tags, created_at filled by defaults + // final_price, in_stock, display_price computed automatically + }) + } + + return ( +
+ {products?.map(product => ( +
+

{product.name}

+

{product.description}

+

Price: {product.display_price}

+

Stock: {product.in_stock ? `${product.stock} available` : 'Out of stock'}

+

Category: {product.category}

+
+ ))} +
+ ) +} +``` + +--- + +## For Integration Authors + +If you're creating a custom collection options creator (like `electricCollectionOptions` or `trailbaseCollectionOptions`), you need to understand how schemas interact with your sync layer. + +### Two Type Conversion Mechanisms + +There are **two separate but complementary** type conversion mechanisms: + +1. **Your integration's parsing** (storage format ↔ in-memory format) + - Example: Unix timestamp β†’ Date, WKB β†’ GeoJSON + - Layer: Sync (during `write()`) + - Your responsibility as integration author + +2. **User schemas** (TInput β†’ TOutput for mutations) + - Example: ISO string β†’ Date, validation, defaults + - Layer: Mutations (during `insert()`/`update()`) + - User's choice + +### How They Work Together + +```typescript +// 1. User defines schema +const todoSchema = z.object({ + created_at: z.string().transform(val => new Date(val)) +}) + +// 2. Your integration handles storage format +export function myCollectionOptions(config) { + return { + // Parse from storage format (e.g., Unix timestamp β†’ Date) + sync: { + sync: ({ write, collection }) => { + const storageRow = { id: "1", created_at: 1704067200 } // Unix timestamp + + // Your parsing layer + const parsed = { + ...storageRow, + created_at: new Date(storageRow.created_at * 1000) // β†’ Date + } + + // Validate with user's schema (if provided) + const validated = collection.validateData(parsed, 'insert') + + // Write TOutput to collection + write({ type: 'insert', value: validated }) + } + }, + + // Serialize for storage format + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified // TOutput (Date) + + // Your serialization layer + const serialized = { + ...item, + created_at: Math.floor(item.created_at.valueOf() / 1000) // Date β†’ Unix + } + + await storage.write(serialized) + } + } +} +``` + +### Best Practices for Integration Authors + +1. **Always call `collection.validateData()`** when syncing data into the collection +2. **Don't constrain user schemas** to match your storage types - let users define rich TOutput +3. **Handle serialization in mutation handlers** when persisting to your storage +4. **Document your storage formats** so users know what to expect + +### Complete Example + +See the [Collection Options Creator Guide](./collection-options-creator.md) for comprehensive documentation on creating integrations, including detailed guidance on handling schemas. + +--- + +## Related Topics + +- **[Mutations Guide](./mutations.md)** - Learn about optimistic mutations and how schemas validate mutation data +- **[Error Handling Guide](./error-handling.md)** - Comprehensive guide to handling `SchemaValidationError` and other errors +- **[Collection Options Creator Guide](./collection-options-creator.md)** - For integration authors: creating custom collection types with schema support +- **[StandardSchema Specification](https://standardschema.dev)** - Full specification for StandardSchema v1 From f94546658fea55f603b1abb5920e03044cb71a00 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 31 Oct 2025 10:27:24 -0600 Subject: [PATCH 5/9] refactor: revise schemas.md based on feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback to make the guide more focused and practical: - Add clarification that schemas only validate client changes, not server data - Remove "Handling Sync Validation Errors" section - Fix QueryFn example to show manual parsing is required for API responses - Rename "Handling Updates" to "Handling Timestamps" with better focus on common patterns - Remove "Safe Parsing (Zod)" section - Remove "When to Use Schemas" from Best Practices - Remove "Schema Organization" from Best Practices - Replace lengthy "For Integration Authors" section with brief link to collection-options-creator.md πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/guides/schemas.md | 286 ++++++----------------------------------- 1 file changed, 41 insertions(+), 245 deletions(-) diff --git a/docs/guides/schemas.md b/docs/guides/schemas.md index d7c791e1f..47646d0c1 100644 --- a/docs/guides/schemas.md +++ b/docs/guides/schemas.md @@ -96,6 +96,8 @@ Examples in this guide use Zod, but patterns apply to all libraries. Understanding TInput and TOutput is key to working effectively with schemas in TanStack DB. +> **Important:** Schemas validate **client changes only** - data you insert or update via `collection.insert()` and `collection.update()`. They do not automatically validate data loaded from your server or sync layer. If you need to validate server data, you must do so explicitly in your integration layer. + ### What are TInput and TOutput? When you define a schema with transformations, it has two types: @@ -557,128 +559,73 @@ collection.insert({ --- -## Handling Updates +## Handling Timestamps -When updating data, your schema needs to handle both new input (TInput) and existing data (already TOutput). +When working with timestamps, you typically want automatic creation dates rather than transforming user input. -### The Challenge +### Use Defaults for Timestamps -Consider this schema: +For `created_at` and `updated_at` fields, use defaults to automatically generate timestamps: ```typescript const todoSchema = z.object({ id: z.string(), - created_at: z.string().transform(val => new Date(val)) + text: z.string(), + completed: z.boolean().default(false), + created_at: z.date().default(() => new Date()), + updated_at: z.date().default(() => new Date()) }) -``` -**Problem:** During updates, `created_at` is already a Date (TOutput), but the transform expects a string (TInput). The validation will fail! - -```typescript -// Initial insert works +// Timestamps generated automatically collection.insert({ id: "1", - created_at: "2024-01-01T00:00:00Z" // string β†’ Date + text: "Buy groceries" + // created_at and updated_at filled automatically }) -// Update fails! +// Update timestamps collection.update("1", (draft) => { - draft.text = "Updated" - // draft.created_at is already a Date, but schema expects string + draft.text = "Buy groceries and milk" + draft.updated_at = new Date() }) ``` -### Solution: Union Types +### When You Need Union Types -Accept both the input type and the output type: +If you're accepting date input from external sources (forms, APIs), you may need to accept both strings and Date objects: ```typescript -const todoSchema = z.object({ +const eventSchema = z.object({ id: z.string(), - text: z.string(), - created_at: z.union([ - z.string(), // Accept string (new input) - z.date() // Accept Date (existing data) + name: z.string(), + scheduled_for: z.union([ + z.string(), // Accept ISO string from form input + z.date() // Accept Date from existing data or programmatic input ]).transform(val => typeof val === 'string' ? new Date(val) : val ) }) -``` -Now both inserts and updates work: - -```typescript -// Insert with string (TInput) +// Works with string input collection.insert({ id: "1", - text: "Task", - created_at: "2024-01-01T00:00:00Z" // string + name: "Meeting", + scheduled_for: "2024-12-31T15:00:00Z" // From form input }) -// Update works - created_at is already a Date -collection.update("1", (draft) => { - draft.text = "Updated" // created_at stays as Date +// Works with Date input +collection.insert({ + id: "2", + name: "Workshop", + scheduled_for: new Date() // Programmatic }) -// Can also update with a new string +// Updates work - scheduled_for is already a Date collection.update("1", (draft) => { - draft.updated_at = "2024-01-02T00:00:00Z" // string β†’ Date -}) -``` - -### Pattern: Union Transform Helper - -For schemas with many date fields, create a helper: - -```typescript -const dateField = z.union([ - z.string(), - z.date() -]).transform(val => typeof val === 'string' ? new Date(val) : val) - -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - created_at: dateField, - updated_at: dateField, - completed_at: dateField.optional() + draft.name = "Updated Meeting" }) ``` -### When You Don't Need Unions - -If your schema doesn't have transformations, you don't need unions: - -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - completed: z.boolean().default(false), - priority: z.number().default(0) -}) - -// TInput === TOutput (no transformations) -// Updates work fine without unions -``` - -### Optional Fields in Updates - -For partial updates, use `.partial()`: - -```typescript -const insertSchema = z.object({ - id: z.string(), - name: z.string(), - email: z.string().email(), - age: z.number() -}) - -const updateSchema = insertSchema.partial() - -// Now all fields except id are optional for updates -collection.update("1", { name: "Updated Name" }) // OK -``` - --- ## Error Handling @@ -782,66 +729,10 @@ function TodoForm() { } ``` -### Handling Sync Validation Errors - -When syncing data into your collection, handle validation errors gracefully: - -```typescript -sync: { - sync: ({ write, begin, commit, collection }) => { - const data = await fetchFromAPI() - - begin() - for (const item of data) { - try { - const validated = collection.validateData(item, 'insert') - write({ type: 'insert', value: validated }) - } catch (error) { - if (error instanceof SchemaValidationError) { - // Log but don't stop sync - console.error(`Invalid data from server:`, item, error.issues) - continue // Skip this item - } - throw error // Re-throw other errors - } - } - commit() - } -} -``` - -### Safe Parsing (Zod) - -For cases where you want a result type instead of throwing: - -```typescript -const result = todoSchema.safeParse(data) - -if (result.success) { - collection.insert(result.data) -} else { - console.error(result.error.issues) -} -``` - --- ## Best Practices -### When to Use Schemas - -βœ… **Use schemas when you want:** -- Runtime validation of user input -- Type transformations (string β†’ Date, etc.) -- Automatic default values -- Better TypeScript inference -- Validation error messages - -❌ **You might not need schemas if:** -- Your data is already validated (e.g., from a type-safe backend) -- You don't need transformations or defaults -- Performance is critical and validation would be a bottleneck - ### Keep Transformations Simple > **Performance Note:** Schema validation is synchronous and runs on every optimistic mutation. For high-frequency updates, keep transformations simple. @@ -941,36 +832,6 @@ const userSchema = z.object({ }) ``` -### Schema Organization - -For large schemas, organize by domain: - -```typescript -// schemas/user.ts -export const userSchema = z.object({ - id: z.string(), - username: z.string().min(3), - email: z.string().email() -}) - -// schemas/todo.ts -export const todoSchema = z.object({ - id: z.string(), - text: z.string().min(1), - user_id: z.string() -}) - -// collections/todos.ts -import { todoSchema } from '../schemas/todo' - -export const todoCollection = createCollection( - queryCollectionOptions({ - schema: todoSchema, - // ... - }) -) -``` - --- ## Full-Context Examples @@ -1011,8 +872,12 @@ const todoCollection = createCollection( queryFn: async () => { const response = await fetch('/api/todos') const todos = await response.json() - // API returns ISO strings for dates - return todos + // Manually parse API responses - schemas only validate client changes + return todos.map((todo: any) => ({ + ...todo, + due_date: todo.due_date ? new Date(todo.due_date) : undefined, + created_at: new Date(todo.created_at) + })) }, getKey: (item) => item.id, schema: todoSchema, @@ -1234,78 +1099,9 @@ function ProductList() { ## For Integration Authors -If you're creating a custom collection options creator (like `electricCollectionOptions` or `trailbaseCollectionOptions`), you need to understand how schemas interact with your sync layer. - -### Two Type Conversion Mechanisms - -There are **two separate but complementary** type conversion mechanisms: - -1. **Your integration's parsing** (storage format ↔ in-memory format) - - Example: Unix timestamp β†’ Date, WKB β†’ GeoJSON - - Layer: Sync (during `write()`) - - Your responsibility as integration author - -2. **User schemas** (TInput β†’ TOutput for mutations) - - Example: ISO string β†’ Date, validation, defaults - - Layer: Mutations (during `insert()`/`update()`) - - User's choice - -### How They Work Together - -```typescript -// 1. User defines schema -const todoSchema = z.object({ - created_at: z.string().transform(val => new Date(val)) -}) - -// 2. Your integration handles storage format -export function myCollectionOptions(config) { - return { - // Parse from storage format (e.g., Unix timestamp β†’ Date) - sync: { - sync: ({ write, collection }) => { - const storageRow = { id: "1", created_at: 1704067200 } // Unix timestamp - - // Your parsing layer - const parsed = { - ...storageRow, - created_at: new Date(storageRow.created_at * 1000) // β†’ Date - } - - // Validate with user's schema (if provided) - const validated = collection.validateData(parsed, 'insert') - - // Write TOutput to collection - write({ type: 'insert', value: validated }) - } - }, - - // Serialize for storage format - onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // TOutput (Date) - - // Your serialization layer - const serialized = { - ...item, - created_at: Math.floor(item.created_at.valueOf() / 1000) // Date β†’ Unix - } - - await storage.write(serialized) - } - } -} -``` - -### Best Practices for Integration Authors - -1. **Always call `collection.validateData()`** when syncing data into the collection -2. **Don't constrain user schemas** to match your storage types - let users define rich TOutput -3. **Handle serialization in mutation handlers** when persisting to your storage -4. **Document your storage formats** so users know what to expect - -### Complete Example +If you're building a custom collection (like Electric or TrailBase), you'll need to handle data parsing and serialization between your storage format and the in-memory collection format. This is separate from schema validation, which happens during client mutations. -See the [Collection Options Creator Guide](./collection-options-creator.md) for comprehensive documentation on creating integrations, including detailed guidance on handling schemas. +See the [Collection Options Creator Guide](./collection-options-creator.md) for comprehensive documentation on creating custom collection integrations, including how to handle schemas, data parsing, and type transformations. --- From 1cf76e689548f1c19e1b8407ab66d2eaa304175e Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 31 Oct 2025 10:35:54 -0600 Subject: [PATCH 6/9] docs: clarify schema validation and improve queryFn example MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Explicitly mention schemas catch invalid data from optimistic mutations - Show reusing schema with .parse() in queryFn to transform API responses - Remove The Data Flow diagram section (had errors and wasn't useful) πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/guides/schemas.md | 59 +++--------------------------------------- 1 file changed, 3 insertions(+), 56 deletions(-) diff --git a/docs/guides/schemas.md b/docs/guides/schemas.md index 47646d0c1..4d9451741 100644 --- a/docs/guides/schemas.md +++ b/docs/guides/schemas.md @@ -17,7 +17,7 @@ This guide covers: ## Quick Example -Schemas catch invalid data before it enters your collection: +Schemas catch invalid data from optimistic mutations before it enters your collection: ```typescript import { z } from 'zod' @@ -155,55 +155,6 @@ const todo = collection.get("1") console.log(todo.created_at.getFullYear()) // It's a Date! ``` -### The Data Flow - -Here's how data flows through the system: - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ User Code / API Response β”‚ -β”‚ (TInput format) β”‚ -β”‚ { created_at: "2024-01-01T00:00:00Z" } β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ collection.insert(data) β”‚ -β”‚ or β”‚ -β”‚ collection.validateData(data) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Schema Validation & Transformation β”‚ -β”‚ (TInput β†’ TOutput) β”‚ -β”‚ β”‚ -β”‚ 1. Validate types and constraints β”‚ -β”‚ 2. Apply transformations (.transform()) β”‚ -β”‚ 3. Apply defaults (.default()) β”‚ -β”‚ 4. Return validated TOutput β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Collection Storage β”‚ -β”‚ (TOutput format) β”‚ -β”‚ { created_at: Date object } β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Queries / Reads / Mutation Handlers β”‚ -β”‚ (TOutput format) β”‚ -β”‚ { created_at: Date object } β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -**Key points:** -1. Schema validation happens at the **collection boundary** -2. **Everything inside the collection is TOutput** -3. Validation runs during `insert()`, `update()`, and `validateData()` - --- ## Validation Patterns @@ -872,12 +823,8 @@ const todoCollection = createCollection( queryFn: async () => { const response = await fetch('/api/todos') const todos = await response.json() - // Manually parse API responses - schemas only validate client changes - return todos.map((todo: any) => ({ - ...todo, - due_date: todo.due_date ? new Date(todo.due_date) : undefined, - created_at: new Date(todo.created_at) - })) + // Reuse schema to parse and transform API responses + return todos.map((todo: any) => todoSchema.parse(todo)) }, getKey: (item) => item.id, schema: todoSchema, From a25f2b79d22ea53416600b062b0ce1103efd3fa9 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 31 Oct 2025 11:03:10 -0600 Subject: [PATCH 7/9] docs: update all docs with schema information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updates across documentation to explain schemas and type transformations: **overview.md:** - Expand collection schemas section with comprehensive example - Add list of what schemas do (validation, transformations, defaults, type safety) - Link to new schemas guide **mutations.md:** - Add "Schema Validation in Mutation Handlers" section - Explain that handlers receive TOutput (transformed data) - Show serialization pattern for backends **error-handling.md:** - Add "When schema validation occurs" section - Clarify schemas only validate client mutations, not sync data - Link to schemas guide **collection-options-creator.md:** - Add "Schemas and Type Transformations" section - Explain three approaches: parse/serialize helpers, user handles, automatic serialization - Show examples from TrailBase and Query Collection - Document design principles for integration authors πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/guides/collection-options-creator.md | 119 +++++++++++++++++++++- docs/guides/error-handling.md | 26 +++++ docs/guides/mutations.md | 43 ++++++++ docs/overview.md | 45 +++++++- 4 files changed, 228 insertions(+), 5 deletions(-) diff --git a/docs/guides/collection-options-creator.md b/docs/guides/collection-options-creator.md index 058bb7b2c..d1f4d55c4 100644 --- a/docs/guides/collection-options-creator.md +++ b/docs/guides/collection-options-creator.md @@ -219,7 +219,124 @@ parse: { } ``` -### 5. Mutation Handler Patterns +### 5. Schemas and Type Transformations + +When building a custom collection, you need to decide how to handle the relationship between your backend's storage format and the client-side types users work with in their collections. + +#### Two Separate Concerns + +**Backend Format** - The types your storage layer uses (SQLite, Postgres, Firebase, etc.) +- Examples: Unix timestamps, ISO strings, JSON strings, PostGIS geometries + +**Client Format** - The types users work with in their TanStack DB collections +- Examples: Date objects, parsed JSON, GeoJSON + +Schemas in TanStack DB define the **client format** (TInput/TOutput for mutations). How you bridge between backend and client format depends on your integration design. + +#### Approach 1: Integration Provides Parse/Serialize Helpers + +For backends with specific storage formats, provide `parse`/`serialize` options that users configure: + +```typescript +// TrailBase example: User specifies field conversions +export function trailbaseCollectionOptions(config) { + return { + parse: config.parse, // User provides field conversions + serialize: config.serialize, + + onInsert: async ({ transaction }) => { + const serialized = transaction.mutations.map(m => + serializeFields(m.modified, config.serialize) + ) + await config.recordApi.createBulk(serialized) + } + } +} + +// User explicitly configures conversions +const collection = createCollection( + trailbaseCollectionOptions({ + schema: todoSchema, + parse: { + created_at: (ts: number) => new Date(ts * 1000) // Unix β†’ Date + }, + serialize: { + created_at: (date: Date) => Math.floor(date.valueOf() / 1000) // Date β†’ Unix + } + }) +) +``` + +**Benefits:** Explicit control over type conversions. Integration handles applying them consistently. + +#### Approach 2: User Handles Everything in QueryFn/Handlers + +For simple APIs or when users want full control, they handle parsing/serialization themselves: + +```typescript +// Query Collection: User handles all transformations +const collection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + queryFn: async () => { + const response = await fetch('/api/todos') + const todos = await response.json() + // User manually parses to match their schema's TOutput + return todos.map(todo => ({ + ...todo, + created_at: new Date(todo.created_at) // ISO string β†’ Date + })) + }, + onInsert: async ({ transaction }) => { + // User manually serializes for their backend + await fetch('/api/todos', { + method: 'POST', + body: JSON.stringify({ + ...transaction.mutations[0].modified, + created_at: transaction.mutations[0].modified.created_at.toISOString() // Date β†’ ISO string + }) + }) + } + }) +) +``` + +**Benefits:** Maximum flexibility, no abstraction overhead. Users see exactly what's happening. + +#### Approach 3: Automatic Serialization in Handlers + +If your backend has well-defined types, you can automatically serialize in mutation handlers: + +```typescript +export function myCollectionOptions(config) { + return { + onInsert: async ({ transaction }) => { + // Automatically serialize known types for your backend + const serialized = transaction.mutations.map(m => ({ + ...m.modified, + // Date objects β†’ Unix timestamps for your backend + created_at: m.modified.created_at instanceof Date + ? Math.floor(m.modified.created_at.valueOf() / 1000) + : m.modified.created_at + })) + await backend.insert(serialized) + } + } +} +``` + +**Benefits:** Least configuration for users. Integration handles backend format automatically. + +#### Key Design Principles + +1. **Schemas validate client mutations only** - They don't affect how backend data is parsed during sync +2. **TOutput is the application-facing type** - This is what users work with in their app +3. **Choose your approach based on backend constraints** - Fixed types β†’ automatic serialization; varying types β†’ user configuration +4. **Document your backend format clearly** - Explain what types your storage uses and how to handle them + +For more on schemas from a user perspective, see the [Schemas guide](./schemas.md). + +### 6. Mutation Handler Patterns There are two distinct patterns for handling mutations in collection options creators: diff --git a/docs/guides/error-handling.md b/docs/guides/error-handling.md index 7661c240b..fb2eaeb2f 100644 --- a/docs/guides/error-handling.md +++ b/docs/guides/error-handling.md @@ -45,6 +45,32 @@ The error includes: - `issues`: Array of validation issues with messages and paths - `message`: A formatted error message listing all issues +**When schema validation occurs:** + +Schema validation happens only for **client mutations** - when you explicitly insert or update data: + +1. **During inserts** - When `collection.insert()` is called +2. **During updates** - When `collection.update()` is called + +Schemas do **not** validate data coming from your server or sync layer. That data is assumed to already be valid. + +```typescript +const schema = z.object({ + id: z.string(), + created_at: z.string().transform(val => new Date(val)) + // TInput: string, TOutput: Date +}) + +// Validation happens here βœ“ +collection.insert({ + id: "1", + created_at: "2024-01-01" // TInput: string +}) +// If successful, stores: { created_at: Date } // TOutput: Date +``` + +For more details on schema validation and type transformations, see the [Schemas guide](./schemas.md). + ## Query Collection Error Tracking Query collections provide enhanced error tracking utilities through the `utils` object. These methods expose error state information and provide recovery mechanisms for failed queries: diff --git a/docs/guides/mutations.md b/docs/guides/mutations.md index 24e8aba92..44d53c50c 100644 --- a/docs/guides/mutations.md +++ b/docs/guides/mutations.md @@ -447,6 +447,49 @@ const todoCollection = createCollection({ }) ``` +### Schema Validation in Mutation Handlers + +When a schema is configured for a collection, TanStack DB automatically validates and transforms data during mutations. The mutation handlers receive the **transformed data** (TOutput), not the raw input. + +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + created_at: z.string().transform(val => new Date(val)) // TInput: string, TOutput: Date +}) + +const collection = createCollection({ + schema: todoSchema, + onInsert: async ({ transaction }) => { + const item = transaction.mutations[0].modified + + // item.created_at is already a Date object (TOutput) + console.log(item.created_at instanceof Date) // true + + // If your API needs a string, serialize it + await api.todos.create({ + ...item, + created_at: item.created_at.toISOString() // Date β†’ string + }) + } +}) + +// User provides string (TInput) +collection.insert({ + id: "1", + text: "Task", + created_at: "2024-01-01T00:00:00Z" +}) +``` + +**Key points:** +- Schema validation happens **before** mutation handlers are called +- Handlers receive **TOutput** (transformed data) +- If your backend needs a different format, serialize in the handler +- Schema validation errors throw `SchemaValidationError` before handlers run + +For comprehensive documentation on schema validation and transformations, see the [Schemas guide](./schemas.md). + ## Creating Custom Actions For more complex mutation patterns, use `createOptimisticAction` to create custom actions with full control over the mutation lifecycle. diff --git a/docs/overview.md b/docs/overview.md index b5597a335..23bb9a490 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -145,12 +145,49 @@ You can also use: All collections optionally (though strongly recommended) support adding a `schema`. -If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as a [Zod](https://zod.dev) or [Effect](https://effect.website/docs/schema/introduction/) schema. +If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as [Zod](https://zod.dev), [Valibot](https://valibot.dev), [ArkType](https://arktype.io), or [Effect](https://effect.website/docs/schema/introduction/). + +**What schemas do:** + +1. **Runtime validation** - Ensures data meets your constraints before entering the collection +2. **Type transformations** - Convert input types to rich output types (e.g., string β†’ Date) +3. **Default values** - Automatically populate missing fields +4. **Type safety** - Infer TypeScript types from your schema + +**Example:** +```typescript +const todoSchema = z.object({ + id: z.string(), + text: z.string(), + completed: z.boolean().default(false), + created_at: z.string().transform(val => new Date(val)), // string β†’ Date + priority: z.number().default(0) +}) + +const collection = createCollection( + queryCollectionOptions({ + schema: todoSchema, + // ... + }) +) + +// Users provide simple inputs +collection.insert({ + id: "1", + text: "Buy groceries", + created_at: "2024-01-01T00:00:00Z" // string + // completed and priority filled automatically +}) + +// Collection stores and returns rich types +const todo = collection.get("1") +console.log(todo.created_at.getFullYear()) // It's a Date! +console.log(todo.completed) // false (default) +``` -The collection will use the schema to do client-side validation of optimistic mutations. +The collection will use the schema for its type inference. If you provide a schema, you cannot also pass an explicit type parameter (e.g., `createCollection()`). -The collection will use the schema for its type so if you provide a schema, you can't also pass in an explicit -type (e.g. `createCollection()`). +**Learn more:** See the [Schemas guide](./guides/schemas.md) for comprehensive documentation on schema validation, type transformations, and best practices. #### `QueryCollection` From d43d711f3c1114d028b5644c67d8ecbf2b708312 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Fri, 31 Oct 2025 11:07:27 -0600 Subject: [PATCH 8/9] Remove docs --- powersync-response.md | 451 --------------------- schema-documentation-plan-v2.md | 596 ---------------------------- schema-documentation-proposal.md | 650 ------------------------------- 3 files changed, 1697 deletions(-) delete mode 100644 powersync-response.md delete mode 100644 schema-documentation-plan-v2.md delete mode 100644 schema-documentation-proposal.md diff --git a/powersync-response.md b/powersync-response.md deleted file mode 100644 index 5d1d14815..000000000 --- a/powersync-response.md +++ /dev/null @@ -1,451 +0,0 @@ -# PowerSync Integration: Schema Types and Transformations - -Hi! Thanks for the detailed explanation and the POC. I think there's a misunderstanding about how `TInput`/`TOutput` work in TanStack DB schemas, and the good news is that you **can** support arbitrary TOutput transformations (like `Date` objects) with your PowerSync integration! - -## TL;DR - -**TOutput should always be the rich JavaScript types that users want to work with** (e.g., `Date` objects). Your PowerSync integration layer is responsible for: -- Deserializing SQLite types β†’ TOutput when syncing TO TanStack DB -- Serializing TOutput β†’ SQLite types when persisting mutations FROM TanStack DB - -You don't need to constrain TOutput to match SQLite types. - ---- - -## Understanding TInput and TOutput - -Let me clarify the data flow: - -### TInput: What Users Provide for Mutations - -When users call `collection.insert()` or `collection.update()`, they provide data in the **TInput** format. The schema validates and transforms this into **TOutput**. - -```typescript -// Example schema -const schema = z.object({ - id: z.string(), - created_at: z.string().transform(val => new Date(val)) -}) - -// TInput: { id: string, created_at: string } -// TOutput: { id: string, created_at: Date } - -// User inserts with TInput -collection.insert({ - id: "1", - created_at: "2023-01-01T00:00:00.000Z" // string -}) - -// Collection stores as TOutput -collection.get("1") -// Result: { id: "1", created_at: Date } // Date object -``` - -### TOutput: What Gets Stored and Read from Collection - -**All data in the collection is stored as TOutput.** This includes: -1. Data synced via `write()` -2. Data from user mutations (after validation) -3. Data in `PendingMutation.modified` - -Looking at the source code: - -```typescript -// packages/db/src/collection/sync.ts:93 -write: (message: Omit, 'key'>) => void -``` - -The `write()` function expects **TOutput**, not SQLite types. - -```typescript -// packages/db/src/collection/mutations.ts:179 -const mutation: PendingMutation = { - mutationId: crypto.randomUUID(), - original: {}, - modified: validatedData, // This is TOutput - // ... -} -``` - -**PendingMutations store TOutput**, which is the in-memory representation. - ---- - -## How PowerSync Integration Should Work - -Your integration has two responsibilities: - -### 1. Syncing FROM SQLite TO TanStack DB - -When reading from SQLite and syncing to the collection, **deserialize to TOutput before calling write()**: - -```typescript -// Your PowerSync sync implementation -const sync: SyncConfig = { - sync: ({ write, begin, commit }) => { - // Read from SQLite - const sqliteRows = db.execute("SELECT * FROM documents") - - begin() - for (const row of sqliteRows) { - // SQLite gives you: { id: "1", created_at: "2023-01-01T00:00:00.000Z" } - - // Option A: If you have a schema, use validateData to transform - const transformed = collection.validateData(row, 'insert') - // Result: { id: "1", created_at: Date } - - write({ - type: 'insert', - value: transformed // TOutput with Date object - }) - - // Option B: If no schema, you need to manually transform - // const transformed = { - // ...row, - // created_at: new Date(row.created_at) - // } - // write({ type: 'insert', value: transformed }) - } - commit() - } -} -``` - -### 2. Persisting FROM TanStack DB TO SQLite - -When handling mutations (onInsert/onUpdate/onDelete), **serialize TOutput to SQLite types**: - -```typescript -const collection = createCollection( - powerSyncCollectionOptions({ - database: db, - table: APP_SCHEMA.props.documents, - schema: z.object({ - id: z.string(), - name: z.string(), - created_at: z.string().transform(val => new Date(val)) - }), - - // In your mutation handler, serialize before writing to SQLite - onInsert: async ({ transaction }) => { - const mutation = transaction.mutations[0] - const item = mutation.modified // This is TOutput: { created_at: Date } - - // Serialize to SQLite types - const sqliteData = { - id: item.id, - name: item.name, - created_at: item.created_at.toISOString() // Date β†’ string - } - - // Write to SQLite - await db.execute( - "INSERT INTO documents (id, name, created_at) VALUES (?, ?, ?)", - [sqliteData.id, sqliteData.name, sqliteData.created_at] - ) - - // Add to upload queue - await uploadQueue.enqueue(mutation) - } - }) -) -``` - ---- - -## Real Examples from Other Integrations - -All existing integrations follow this pattern. Let me show you: - -### Example 1: Manual Sync (packages/query-db-collection/src/manual-sync.ts) - -```typescript -// Line 145-150 -case 'insert': { - const resolved = ctx.collection.validateData(op.data, 'insert') - ctx.write({ - type: 'insert', - value: resolved // TOutput (with Date objects if schema transforms) - }) - break -} -``` - -The manual sync validates data to get TOutput, then writes it to the collection. - -### Example 2: RxDB Integration (packages/rxdb-db-collection/src/rxdb.ts) - -```typescript -// Line 189-191 -write({ - type: 'insert', - value: stripRxdbFields(clone(d)) // Application-level objects -}) -``` - -RxDB handles its own serialization internally. By the time it reaches `write()`, it's already in application types. - -### Example 3: Schema Validation Test (packages/db/tests/collection-schema.test.ts) - -This test demonstrates exactly what you want to do: - -```typescript -// Line 14-43 -const userSchema = z.object({ - id: z.string(), - name: z.string(), - email: z.string().email(), - created_at: z.string().transform((val) => new Date(val)), // string β†’ Date - updated_at: z.string().transform((val) => new Date(val)), -}) - -const collection = createCollection({ - getKey: (item) => item.id, - schema: userSchema, - sync: { sync: () => {} }, -}) - -// Test insert validation -const insertData = { - id: '1', - name: 'John Doe', - email: 'john@example.com', - created_at: '2023-01-01T00:00:00.000Z', // Input: string - updated_at: '2023-01-01T00:00:00.000Z', -} - -const validatedInsert = collection.validateData(insertData, 'insert') - -// Verify that the data has been transformed -expect(validatedInsert.created_at).toBeInstanceOf(Date) // βœ… It's a Date! -expect(validatedInsert.updated_at).toBeInstanceOf(Date) -``` - -The schema successfully transforms strings to Dates, and that's what gets stored in the collection. - ---- - -## Addressing Your Specific Concerns - -> **"If we want to return a Date when reading, TOutput should be Date."** - -βœ… Correct! TOutput should be Date. - -> **"Developers must provide an ISO string when insertingβ€”this is not ideal, but manageable."** - -βœ… This is actually perfect! Users provide strings (TInput), schema transforms to Date (TOutput). - -```typescript -// User-friendly API -collection.insert({ - id: "1", - created_at: "2023-01-01T00:00:00.000Z" // String is fine -}) - -// Gets transformed to Date automatically -collection.get("1").created_at // Returns: Date object -``` - -> **"Incoming sync data is a string; we need to validate/convert it before writing. The schema can help, but handling validation failures is tricky."** - -βœ… Use `collection.validateData()` before calling `write()`: - -```typescript -sync: ({ write, begin, commit }) => { - begin() - for (const sqliteRow of sqliteData) { - try { - // This transforms string β†’ Date using the schema - const validated = collection.validateData(sqliteRow, 'insert') - write({ type: 'insert', value: validated }) - } catch (error) { - if (error instanceof SchemaValidationError) { - // Handle validation failure - maybe skip this row, log it, etc. - console.error('Invalid data from SQLite:', error.issues) - continue - } - throw error - } - } - commit() -} -``` - -> **"The main blocker is PendingMutations: we can't directly write a Date (or other arbitrary types) from the mutation to SQLite"** - -βœ… This is where you serialize! PendingMutations have Date objects, but you serialize when writing to SQLite: - -```typescript -onInsert: async ({ transaction }) => { - const mutation = transaction.mutations[0] - const item = mutation.modified // TOutput: { created_at: Date } - - // Serialize just for SQLite - const sqliteRow = { - ...item, - created_at: item.created_at.toISOString() // Date β†’ string - } - - await db.execute("INSERT INTO ...", sqliteRow) -} -``` - -> **"The schema doesn't convert it back to TInput."** - -βœ… Correct - and it doesn't need to! **You handle serialization in your integration layer**, not in the schema. The schema is for user-facing transformations, not storage serialization. - ---- - -## Recommended Architecture - -Here's how I'd structure your PowerSync integration: - -```typescript -// Helper function to serialize TOutput β†’ SQLite -function serializeForSQLite(item: TOutput): SQLiteRow { - return { - ...item, - created_at: item.created_at instanceof Date - ? item.created_at.toISOString() - : item.created_at, - // Handle other type conversions as needed - } -} - -// Helper function to deserialize SQLite β†’ TOutput -function deserializeFromSQLite(row: SQLiteRow, collection: Collection): TOutput { - // Use the collection's schema to transform - return collection.validateData(row, 'insert') -} - -export function powerSyncCollectionOptions(config) { - return { - ...config, - - sync: { - sync: ({ write, begin, commit, collection }) => { - // Read from SQLite - const rows = config.database.execute(...) - - begin() - for (const row of rows) { - // Deserialize: SQLite β†’ TOutput - const deserialized = deserializeFromSQLite(row, collection) - write({ type: 'insert', value: deserialized }) - } - commit() - } - }, - - onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // TOutput - - // Serialize: TOutput β†’ SQLite - const sqliteRow = serializeForSQLite(item) - - // Write to SQLite - await config.database.execute( - "INSERT INTO ...", - sqliteRow - ) - }, - - // Similar for onUpdate, onDelete - } -} -``` - ---- - -## Summary - -You asked: -> "Let me know if you have suggestions/feedback or if I've misunderstood any part of the TanStackDB schema handling!" - -**Key points:** - -1. βœ… **TOutput should be rich JavaScript types** (Date, etc.) - this is what users see -2. βœ… **TInput is what users provide** for mutations (can be strings that transform to Date) -3. βœ… **Your integration handles serialization**, not the schema: - - When syncing TO collection: `SQLite types` β†’ (deserialize) β†’ `TOutput` β†’ `write()` - - When persisting FROM collection: `mutation.modified` (TOutput) β†’ (serialize) β†’ `SQLite types` -4. βœ… **PendingMutations store TOutput** - you serialize when writing to SQLite -5. βœ… **Use `collection.validateData()`** to transform SQLite data before calling `write()` - -The limitation you described is self-imposed! You **can** support arbitrary TOutput transformations - you just need to handle serialization in your PowerSync adapter layer, similar to how RxDB and Electric do it. - ---- - -## Example: Full Date Support - -Here's a complete example showing Date support: - -```typescript -// User-friendly schema -const schema = z.object({ - id: z.string(), - name: z.string(), - created_at: z.string().transform(val => new Date(val)), // TInput: string, TOutput: Date - updated_at: z.string().transform(val => new Date(val)), -}) - -const collection = createCollection( - powerSyncCollectionOptions({ - database: powerSyncDB, - table: APP_SCHEMA.props.documents, - schema, - - sync: { - sync: ({ write, begin, commit, collection }) => { - // Sync FROM SQLite - powerSyncDB.watch('documents', (changes) => { - begin() - for (const change of changes) { - // SQLite row: { id: "1", created_at: "2023-01-01T00:00:00.000Z" } - // validateData transforms string β†’ Date - const transformed = collection.validateData(change, 'insert') - // Now: { id: "1", created_at: Date } - - write({ type: 'insert', value: transformed }) - } - commit() - }) - } - }, - - onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified - // item.created_at is a Date here (TOutput) - - // Serialize for SQLite - const sqliteRow = { - ...item, - created_at: item.created_at.toISOString(), // Date β†’ string - updated_at: item.updated_at.toISOString(), - } - - // Write to SQLite - await powerSyncDB.execute( - "INSERT INTO documents (id, name, created_at, updated_at) VALUES (?, ?, ?, ?)", - [sqliteRow.id, sqliteRow.name, sqliteRow.created_at, sqliteRow.updated_at] - ) - } - }) -) - -// Users get a great API: -collection.insert({ - id: "1", - name: "My Doc", - created_at: "2023-01-01T00:00:00.000Z" // String input is fine -}) - -// Reads return Date objects: -const doc = collection.get("1") -console.log(doc.created_at instanceof Date) // true βœ… -console.log(doc.created_at.getFullYear()) // 2023 -``` - ---- - -Hope this clarifies things! Your POC looks great, and with this approach you can provide the best DX (Date objects, custom types) while still syncing through SQLite. Let me know if you have questions! diff --git a/schema-documentation-plan-v2.md b/schema-documentation-plan-v2.md deleted file mode 100644 index a78b5a8db..000000000 --- a/schema-documentation-plan-v2.md +++ /dev/null @@ -1,596 +0,0 @@ -# Schema Documentation Plan - v2 (Refined After Investigation) - -## Investigation Summary - -### What's Currently Documented - -**1. overview.md (lines 144-154)** -- βœ… Mentions schemas are optional but recommended -- βœ… Lists supported schema libraries (Zod, Effect) -- βœ… Says schemas do "client-side validation" -- ❌ No explanation of TInput/TOutput -- ❌ No transformation examples -- ❌ No mention of data flow - -**2. mutations.md (lines 495-560)** -- βœ… Shows Zod for **action parameter validation** -- ❌ Does NOT cover TInput/TOutput for collection data -- ❌ Does NOT show schema transformations - -**3. error-handling.md (lines 25-46)** -- βœ… Shows SchemaValidationError basics -- βœ… Shows error properties (type, issues, message) -- ❌ No explanation of when/why validation happens -- ❌ No transformation examples - -**4. live-queries.md** -- βœ… Mentions schema is optional for createLiveQueryCollection (line 95) -- ❌ No explanation of schema behavior -- ❌ Only says "result types are automatically inferred" - -**5. collection-options-creator.md (lines 174-220)** -- βœ… Has section on "Data Parsing and Type Conversion" -- βœ… Shows integration-specific `parse`/`serialize` functions -- ❌ This is DIFFERENT from schema validation -- ❌ Doesn't explain the relationship between the two - -### Key Finding: Two Distinct Mechanisms - -From examples and codebase, there are **TWO separate type conversion mechanisms**: - -1. **Integration-Level Parsing** (e.g., TrailBase's `parse/serialize`, Electric's `parser`) - - Purpose: Convert between storage format and in-memory format - - Layer: Sync layer (happens during `write()`) - - Example: Unix timestamp β†’ Date, WKB β†’ GeoJSON - - Used by: Integration authors - -2. **Schema Validation/Transformation** (the `schema` property) - - Purpose: Validate user input and transform TInput β†’ TOutput - - Layer: Mutation layer (happens during `insert()`/`update()`) - - Example: ISO string β†’ Date, applying defaults, validation - - Used by: App developers - -**These are complementary but serve different purposes!** - ---- - -## Proposed Documentation Strategy - -### Phase 1: Create New Comprehensive Guide - -**File:** `docs/guides/schemas.md` - -**Why a new guide?** -- Schemas affect mutations, queries, sync, AND error handling -- Content is substantial (~1500 lines with examples) -- Needs to serve both app developers AND integration authors -- Deserves prominent discoverability - -**Target Audiences:** -1. **App Developers** (80% of content) - - Understanding TInput/TOutput - - Using transformations - - Handling updates - - Error handling - - Best practices - -2. **Integration Authors** (20% of content) - - How schemas interact with sync - - When to use integration parsing vs schemas - - Calling `validateData()` correctly - - Handling serialization - ---- - -### Phase 2: Update Existing Docs - -#### 2.1 Update `overview.md` (lines 144-154) - -**Current:** -```markdown -#### Collection schemas - -All collections optionally (though strongly recommended) support adding a `schema`. - -If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as a [Zod](https://zod.dev) or [Effect](https://effect.website/docs/schema/introduction/) schema. - -The collection will use the schema to do client-side validation of optimistic mutations. -``` - -**Replace with:** -```markdown -#### Collection schemas - -All collections optionally (though strongly recommended) support adding a `schema`. - -If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as [Zod](https://zod.dev), [Valibot](https://valibot.dev), [ArkType](https://arktype.io), or [Effect](https://effect.website/docs/schema/introduction/). - -**What schemas do:** - -1. **Runtime validation** - Ensures data meets your constraints before entering the collection -2. **Type transformations** - Convert input types to rich output types (e.g., string β†’ Date) -3. **Default values** - Automatically populate missing fields -4. **Type safety** - Infer TypeScript types from your schema - -**Example:** -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - completed: z.boolean().default(false), - created_at: z.string().transform(val => new Date(val)), // string β†’ Date - priority: z.number().default(0) -}) - -const collection = createCollection( - queryCollectionOptions({ - schema: todoSchema, - // ... - }) -) - -// Users provide simple inputs -collection.insert({ - id: "1", - text: "Buy groceries", - created_at: "2024-01-01T00:00:00Z" // string - // completed and priority filled automatically -}) - -// Collection stores and returns rich types -const todo = collection.get("1") -console.log(todo.created_at.getFullYear()) // It's a Date! -console.log(todo.completed) // false (default) -``` - -The collection will use the schema for its type inference. If you provide a schema, you cannot also pass an explicit type parameter (e.g., `createCollection()`). - -**Learn more:** See the [Schemas guide](../guides/schemas.md) for comprehensive documentation on schema validation, type transformations, and best practices. -``` - -#### 2.2 Add to `mutations.md` (after Operation Handlers section, ~line 394) - -Add a new section: - -```markdown -### Schema Validation in Mutation Handlers - -When a schema is configured, TanStack DB automatically validates and transforms data during mutations. The mutation handlers receive the **transformed data** (TOutput), not the raw input. - -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - created_at: z.string().transform(val => new Date(val)) // TInput: string, TOutput: Date -}) - -const collection = createCollection({ - schema: todoSchema, - onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified - - // item.created_at is already a Date object (TOutput) - console.log(item.created_at instanceof Date) // true - - // If your API needs a string, serialize it - await api.todos.create({ - ...item, - created_at: item.created_at.toISOString() // Date β†’ string - }) - } -}) - -// User provides string (TInput) -collection.insert({ - id: "1", - text: "Task", - created_at: "2024-01-01T00:00:00Z" -}) -``` - -**Key points:** -- Schema validation happens **before** mutation handlers are called -- Handlers receive **TOutput** (transformed data) -- If your backend needs a different format, serialize in the handler -- Schema validation errors throw `SchemaValidationError` before handlers run - -For comprehensive documentation on schema validation and transformations, see the [Schemas guide](./schemas.md). -``` - -#### 2.3 Update `error-handling.md` (lines 25-46) - -**Current section is good but add after line 46:** - -```markdown -**When schema validation occurs:** - -Schema validation happens at the **collection boundary** when data enters or is modified: - -1. **During inserts** - When `collection.insert()` is called -2. **During updates** - When `collection.update()` is called -3. **During sync writes** - When integration calls `collection.validateData()` - -The schema transforms **TInput** (user-provided data) into **TOutput** (stored data): - -```typescript -const schema = z.object({ - created_at: z.string().transform(val => new Date(val)) - // TInput: string, TOutput: Date -}) - -// Validation happens here ↓ -collection.insert({ - created_at: "2024-01-01" // TInput: string -}) -// If successful, stores: { created_at: Date } // TOutput: Date -``` - -For more details on schema validation and type transformations, see the [Schemas guide](./schemas.md). -``` - -#### 2.4 Update `collection-options-creator.md` (after line 220) - -**Add a new section after "Data Parsing and Type Conversion":** - -```markdown -### Integration Parsing vs Schema Validation - -Integration authors need to understand the **two distinct type conversion mechanisms**: - -#### 1. Integration-Level Parsing (`parse`/`serialize` or `parser`) - -This is **your responsibility** as an integration author. It converts between storage format and in-memory format. - -```typescript -// Example: TrailBase stores timestamps as Unix seconds -export function trailbaseCollectionOptions(config) { - return { - parse: { - created_at: (ts: number) => new Date(ts * 1000) // Unix timestamp β†’ Date - }, - serialize: { - created_at: (date: Date) => Math.floor(date.valueOf() / 1000) // Date β†’ Unix timestamp - }, - // This happens during sync write() - } -} -``` - -**When to use:** When your storage layer uses different types than TanStack DB (e.g., Unix timestamps, WKB geometry, JSON strings). - -**Where it happens:** In the sync layer, during `write()` operations. - -#### 2. Schema Validation (the `schema` property) - -This is **the user's choice**. They can optionally provide a schema that validates and transforms data during mutations. - -```typescript -// User-defined schema -const todoSchema = z.object({ - id: z.string(), - created_at: z.string().transform(val => new Date(val)) // string β†’ Date -}) - -const collection = createCollection( - myCollectionOptions({ - schema: todoSchema, // User provides this - // ... - }) -) -``` - -**When to use (for users):** For input validation, transformations, and defaults during mutations. - -**Where it happens:** At the mutation layer, during `insert()`/`update()`. - -#### How They Work Together - -```typescript -// 1. User calls insert with TInput -collection.insert({ - id: "1", - created_at: "2024-01-01T00:00:00Z" // string -}) - -// 2. Schema validates/transforms: string β†’ Date (if schema is provided) -// Result: { id: "1", created_at: Date } // TOutput - -// 3. Your onInsert handler receives TOutput -onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // { created_at: Date } - - // 4. Serialize for your storage layer - const storageFormat = { - ...item, - created_at: Math.floor(item.created_at.valueOf() / 1000) // Date β†’ Unix timestamp - } - - // 5. Write to storage - await storage.write(storageFormat) -} - -// 6. When syncing back FROM storage: -sync: ({ write, collection }) => { - const storageRow = { id: "1", created_at: 1704067200 } // Unix timestamp - - // 7. Parse from storage format - const parsed = { - ...storageRow, - created_at: new Date(storageRow.created_at * 1000) // Unix β†’ Date - } - - // 8. Optionally validate with schema - const validated = collection.validateData(parsed, 'insert') - - // 9. Write to collection as TOutput - write({ type: 'insert', value: validated }) -} -``` - -#### Best Practices for Integration Authors - -1. **Always call `collection.validateData()`** when syncing data INTO the collection -2. **Serialize in mutation handlers** when persisting data FROM the collection -3. **Don't constrain user schemas** - let users define rich TOutput types -4. **Document your parsing requirements** - explain what formats your storage uses -5. **Provide good TypeScript types** - use generics to support user schemas - -**Example: Calling validateData() during sync** - -```typescript -export function myCollectionOptions(config) { - return { - sync: { - sync: ({ write, begin, commit, collection }) => { - // Read from your storage - const storageData = await fetchFromStorage() - - begin() - for (const row of storageData) { - // Parse from storage format - const parsed = parseFromStorageFormat(row) - - // Validate and transform using user's schema (if provided) - const validated = collection.validateData(parsed, 'insert') - - // Write TOutput to collection - write({ type: 'insert', value: validated }) - } - commit() - } - }, - - onInsert: async ({ transaction }) => { - const items = transaction.mutations.map(m => m.modified) // TOutput - - // Serialize for your storage - const serialized = items.map(item => serializeForStorage(item)) - - // Write to storage - await storage.bulkWrite(serialized) - } - } -} -``` - -For comprehensive documentation on schemas from a user perspective, see the [Schemas guide](./schemas.md). -``` - ---- - -### Phase 3: Create the New Schemas Guide - -**File:** `docs/guides/schemas.md` - -**Structure (detailed outline):** - -#### 1. Introduction (5 min read) -- What schemas do in TanStack DB -- Why use them (type safety, validation, transformations) -- StandardSchema compatibility - -#### 2. Core Concepts: TInput vs TOutput (5 min) -- Clear explanation with diagrams -- Data flow through the system -- Where validation happens - -```typescript -const schema = z.object({ - created_at: z.string().transform(val => new Date(val)) - // TInput: string (what users provide) - // TOutput: Date (what's stored and returned) -}) -``` - -#### 3. Data Flow Diagram -Visual showing the journey from user input β†’ validation β†’ storage β†’ queries - -``` -User Input (TInput) - ↓ -collection.insert() - ↓ -Schema Validation & Transformation - ↓ -Collection Storage (TOutput) - ↓ -Queries & Reads (TOutput) -``` - -#### 4. Transformation Examples (10 min) - -**4.1 Type Conversions** -- String β†’ Date -- Number β†’ Enum -- JSON string β†’ Object -- Computed fields - -**4.2 Default Values** -- `.default()` with literals -- `.default()` with functions -- Arrays and objects - -**4.3 Validation & Constraints** -- `.min()`, `.max()`, `.email()`, etc. -- Custom validation -- Error messages - -#### 5. Handling Updates (10 min) - -**The Challenge:** Existing data is TOutput, but users provide TInput - -**Solution:** Union types - -```typescript -const schema = z.object({ - created_at: z.union([ - z.string(), // New input - z.date() // Existing data - ]).transform(val => typeof val === 'string' ? new Date(val) : val) -}) -``` - -#### 6. For App Developers (15 min) - -**6.1 When to Use Schemas** -- Always recommended -- Benefits list - -**6.2 Common Patterns** -- Date handling (with and without unions) -- Timestamps with defaults -- Type-safe enums -- Nullable/optional fields -- Arrays with defaults - -**6.3 Best Practices** -- Prefer rich TOutput types -- Use unions for updates -- Keep transformations simple -- Consider performance - -**6.4 Complete Example** -Full working todo app with schema - -#### 7. For Integration Authors (10 min) - -**7.1 Understanding the Boundary** -- Schema validation vs integration parsing -- When each happens -- How they work together - -**7.2 Calling validateData()** -```typescript -// When syncing TO collection -const validated = collection.validateData(row, 'insert') -write({ type: 'insert', value: validated }) -``` - -**7.3 Serializing in Handlers** -```typescript -// When persisting FROM collection -onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // TOutput - const serialized = serializeForStorage(item) - await storage.write(serialized) -} -``` - -**7.4 Best Practices** -- Always call validateData() during sync -- Don't constrain user schemas to storage types -- Handle validation errors gracefully - -#### 8. Error Handling (5 min) -- SchemaValidationError structure -- Catching and displaying errors -- Handling invalid sync data - -#### 9. Performance Considerations (3 min) -- When validation happens -- Cost of complex transformations -- Optimization tips - -#### 10. Complete Working Examples (10 min) -- Todo app with rich types -- E-commerce product with computed fields -- Multi-collection transaction - -#### 11. Related Topics -- Links to mutations.md, error-handling.md, collection-options-creator.md -- Link to StandardSchema spec - ---- - -## Implementation Order - -1. βœ… **Create schemas.md** (the comprehensive guide) -2. βœ… **Update overview.md** (expand collection schemas section, add example) -3. βœ… **Update mutations.md** (add schema validation section) -4. βœ… **Update error-handling.md** (add "when schema validation occurs") -5. βœ… **Update collection-options-creator.md** (add "Integration Parsing vs Schema Validation" section) -6. ⏭️ **Update navigation** (add schemas.md to docs navigation/sidebar) -7. ⏭️ **Review examples** (ensure they follow best practices) -8. ⏭️ **Get feedback** (from integration authors and community) - ---- - -## Success Criteria - -After implementation, developers should be able to: - -### App Developers -1. βœ… Explain TInput vs TOutput -2. βœ… Use `.transform()` to convert types -3. βœ… Apply default values with `.default()` -4. βœ… Handle both new input and existing data in update schemas -5. βœ… Understand when schema validation happens -6. βœ… Debug SchemaValidationError - -### Integration Authors -7. βœ… Distinguish between integration parsing and schema validation -8. βœ… Know when to call `collection.validateData()` -9. βœ… Understand where to serialize/deserialize -10. βœ… Avoid constraining user schemas to storage types - ---- - -## Key Insights from Investigation - -1. **Two Mechanisms Exist:** Integration parsing (storage format) and schema validation (user input) serve different purposes - -2. **Real-World Usage:** Examples show: - - Union types for handling both string and Date - - Integration-specific parsing (Electric `parser`, TrailBase `parse/serialize`) - - Schemas for validation and defaults - -3. **Current Gap:** No documentation explains: - - TInput vs TOutput concept - - How the two mechanisms relate - - When to use which approach - - Best practices for either audience - -4. **Documentation Spread:** Schema-related content currently in 4 docs but none comprehensive - -5. **PowerSync Confusion:** Their question proves the need - they didn't understand: - - TOutput should be rich types - - Integration layer handles serialization - - Schema validation vs sync parsing - ---- - -## Next Steps - -1. Get approval on this refined plan -2. Implement schemas.md with full content -3. Make targeted updates to existing docs -4. Add navigation links -5. Review with integration authors (Electric, PowerSync, TrailBase teams) -6. Collect feedback and iterate - -This approach provides: -- βœ… Comprehensive coverage in one place (schemas.md) -- βœ… Targeted updates to existing docs (not overwhelming) -- βœ… Clear distinction between app dev and integration author concerns -- βœ… Addresses PowerSync-type confusion directly -- βœ… Builds on existing example patterns diff --git a/schema-documentation-proposal.md b/schema-documentation-proposal.md deleted file mode 100644 index fde7946e8..000000000 --- a/schema-documentation-proposal.md +++ /dev/null @@ -1,650 +0,0 @@ -# Schema Documentation Proposal - -## Current State Analysis - -### What's Currently Documented - -**overview.md (lines 144-154):** -- Basic mention that collections support schemas (StandardSchema compatible) -- States schemas are used for "client-side validation of optimistic mutations" -- Notes you can't provide both a schema and an explicit type -- Shows basic usage: `schema: todoSchema` - -**mutations.md (lines 495-560):** -- Shows schema validation for **action parameters** (validating inputs to `createOptimisticAction`) -- Does NOT cover TInput/TOutput transformations for collection data - -### Critical Gaps - -1. ❌ **No explanation of TInput vs TOutput** - The core concept is missing -2. ❌ **No transformation examples** - No `.transform()` usage shown -3. ❌ **No default value examples** - No `.default()` usage shown -4. ❌ **No data flow explanation** - Where does validation happen in the system? -5. ❌ **No type conversion patterns** - Common patterns like Date handling, enums, computed fields -6. ❌ **No integration guidance** - How integrations should handle serialization/deserialization -7. ❌ **No best practices** - When to use schemas, what to transform, performance considerations - ---- - -## Proposed Solution: New Dedicated Guide - -**Create: `docs/guides/schemas.md`** - -This deserves its own guide because: -- It's a substantial topic spanning mutations, queries, and sync -- It's relevant to all collection types -- It affects integration authors and app developers differently -- Discoverability is important for this foundational concept - ---- - -## Proposed Content Structure - -### 1. Introduction & Core Concepts (5-10 min read) - -**Title:** "Schema Validation and Type Transformations" - -**Opening:** -- What schemas do in TanStack DB -- Why you should use them (type safety, runtime validation, data transformation) -- Overview of StandardSchema compatibility (Zod, Valibot, ArkType, Effect) - -**Core Concept: TInput vs TOutput** -```typescript -// Example showing the concept clearly -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - completed: z.boolean(), - created_at: z.string().transform(val => new Date(val)), // TInput: string, TOutput: Date - priority: z.number().default(0) // TInput: optional, TOutput: always present -}) - -// TInput = { id: string, text: string, completed: boolean, created_at: string, priority?: number } -// TOutput = { id: string, text: string, completed: boolean, created_at: Date, priority: number } -``` - -**Explain:** -- TInput: What users provide when calling `insert()` or `update()` -- TOutput: What gets stored in the collection and returned from queries -- Schema transforms TInput β†’ TOutput at the collection boundary - ---- - -### 2. The Data Flow (visual diagram + explanation) - -**Include a diagram showing:** - -``` -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ User's Code β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ TInput (strings, partial data) - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ collection.insert(data) β”‚ -β”‚ ↓ β”‚ -β”‚ Schema Validation & Transformation β”‚ -β”‚ (TInput β†’ TOutput) β”‚ -β”‚ ↓ β”‚ -β”‚ - Validate types and constraints β”‚ -β”‚ - Apply transformations (.transform()) β”‚ -β”‚ - Apply defaults (.default()) β”‚ -β”‚ - Convert types (string β†’ Date, etc.) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ TOutput (Dates, complete data) - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Collection Storage β”‚ -β”‚ (stores as TOutput) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - β”‚ TOutput - ↓ -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ Live Queries / Reads β”‚ -β”‚ (returns TOutput) β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ -``` - -**Key Points:** -1. Validation happens at the **collection boundary** (during `insert()`, `update()`, and sync writes) -2. **Everything stored in the collection is TOutput** -3. **Everything read from the collection is TOutput** -4. PendingMutations also store TOutput - ---- - -### 3. Transformation Examples - -**3.1 Type Conversions** - -**Example: String to Date** -```typescript -const eventSchema = z.object({ - id: z.string(), - name: z.string(), - start_time: z.string().transform(val => new Date(val)) -}) - -const collection = createCollection({ - schema: eventSchema, - // ... -}) - -// User provides string -collection.insert({ - id: "1", - name: "Conference", - start_time: "2024-01-01T10:00:00Z" // TInput: string -}) - -// Collection stores Date -const event = collection.get("1") -console.log(event.start_time.getFullYear()) // TOutput: Date -``` - -**Example: Number/String to Enum** -```typescript -const statusSchema = z.object({ - id: z.string(), - status: z.union([ - z.literal('draft'), - z.literal('published'), - z.literal('archived') - ]).default('draft') -}) -``` - -**Example: Computed Fields** -```typescript -const userSchema = z.object({ - id: z.string(), - first_name: z.string(), - last_name: z.string(), -}).transform(data => ({ - ...data, - full_name: `${data.first_name} ${data.last_name}` // Computed during insert -})) -``` - -**3.2 Default Values** - -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - completed: z.boolean().default(false), - created_at: z.date().default(() => new Date()), - priority: z.number().default(0), - tags: z.array(z.string()).default([]) -}) - -// User can omit fields with defaults -collection.insert({ - id: "1", - text: "Buy groceries" - // completed, created_at, priority, tags will be added automatically -}) -``` - -**3.3 Input Validation & Constraints** - -```typescript -const productSchema = z.object({ - id: z.string(), - name: z.string().min(3, "Name must be at least 3 characters"), - price: z.number().positive("Price must be positive"), - email: z.string().email("Invalid email address"), - age: z.number().int().min(18).max(120) -}) - -// This will throw SchemaValidationError -collection.insert({ - id: "1", - name: "A", // Too short - price: -10, // Negative - email: "not-an-email", // Invalid format - age: 200 // Out of range -}) -``` - ---- - -### 4. Handling Updates with Schemas - -**The Challenge with Updates:** -When updating, existing data is already TOutput (e.g., Date objects), but users provide TInput (strings). You need to handle both. - -**Pattern: Union Types** -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string(), - created_at: z.union([z.string(), z.date()]) - .transform(val => typeof val === 'string' ? new Date(val) : val), - updated_at: z.union([z.string(), z.date()]) - .transform(val => typeof val === 'string' ? new Date(val) : val) -}) - -const collection = createCollection({ - schema: todoSchema, - // ... -}) - -// Insert with string (TInput) -collection.insert({ - id: "1", - text: "Task", - created_at: "2024-01-01T00:00:00Z" // string -}) - -// Update merges with existing data (which has Date) -collection.update("1", (draft) => { - draft.updated_at = new Date() // Can use Date OR string - // created_at is already a Date from the insert -}) -``` - -**Why This Works:** -1. During insert: String input β†’ validated β†’ transformed to Date β†’ stored as TOutput -2. During update: Schema merges partial update with existing TOutput, validates the merged result -3. Union type accepts both string (from user input) and Date (from existing data) - ---- - -### 5. Integration-Specific Guidance - -**5.1 For App Developers** - -**When to Use Schemas:** -- βœ… Always use schemas when available - they provide type safety and runtime validation -- βœ… Define rich TOutput types (Date objects, enums, computed fields) -- βœ… Let the schema handle transformations rather than manual conversion - -**Example: Rich Types in TOutput** -```typescript -// Good: Let users provide strings, store as Date -const schema = z.object({ - created_at: z.string().transform(val => new Date(val)) -}) - -// Bad: Forcing users to provide Date objects -const schema = z.object({ - created_at: z.date() // Users must call `new Date()` themselves -}) -``` - -**5.2 For Integration Authors (Electric, PowerSync, RxDB, etc.)** - -**Key Principle:** Your integration layer handles serialization between storage format and TOutput. - -```typescript -// Integration Flow - -// 1. Syncing FROM storage TO TanStack DB -sync: ({ write, collection }) => { - // Read from storage (e.g., SQLite) - const sqliteRow = { id: "1", created_at: "2024-01-01T00:00:00Z" } - - // Deserialize using schema: SQLite format β†’ TOutput - const transformed = collection.validateData(sqliteRow, 'insert') - // Result: { id: "1", created_at: Date } - - // Write TOutput to collection - write({ type: 'insert', value: transformed }) -} - -// 2. Persisting FROM TanStack DB TO storage -onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // This is TOutput - - // Serialize: TOutput β†’ storage format - const sqliteRow = { - ...item, - created_at: item.created_at.toISOString() // Date β†’ string - } - - // Write to storage - await db.execute("INSERT INTO ...", sqliteRow) -} -``` - -**Important:** -- βœ… Call `collection.validateData()` when syncing data INTO the collection -- βœ… Manually serialize TOutput when persisting data FROM the collection -- ❌ Don't constrain TOutput to match storage types -- ❌ Don't skip schema validation during sync - ---- - -### 6. Common Patterns & Best Practices - -**6.1 Date Handling** -```typescript -// Pattern: Accept strings, store as Date -const schema = z.object({ - timestamp: z.string().transform(val => new Date(val)) -}) - -// Pattern: Accept both for updates -const schema = z.object({ - timestamp: z.union([z.string(), z.date()]) - .transform(val => typeof val === 'string' ? new Date(val) : val) -}) -``` - -**6.2 Timestamps with Defaults** -```typescript -const schema = z.object({ - id: z.string(), - created_at: z.date().default(() => new Date()), - updated_at: z.date().default(() => new Date()) -}) - -// Usage -collection.insert({ - id: "1" - // timestamps added automatically -}) -``` - -**6.3 Type-Safe Enums** -```typescript -const schema = z.object({ - status: z.enum(['draft', 'published', 'archived']).default('draft') -}) -``` - -**6.4 Nullable/Optional Fields** -```typescript -const schema = z.object({ - id: z.string(), - notes: z.string().optional(), // TInput: string | undefined, TOutput: string | undefined - deleted_at: z.date().nullable().default(null) // TInput: Date | null, TOutput: Date | null -}) -``` - -**6.5 Arrays with Defaults** -```typescript -const schema = z.object({ - tags: z.array(z.string()).default([]), - metadata: z.record(z.unknown()).default({}) -}) -``` - ---- - -### 7. Performance Considerations - -**Schema Validation Cost:** -- Schema validation runs on every `insert()` and `update()` -- Also runs during sync when calling `validateData()` -- For high-frequency updates, consider simpler schemas - -**Optimization Tips:** -```typescript -// Avoid expensive transforms in hot paths -const schema = z.object({ - id: z.string(), - data: z.string().transform(val => JSON.parse(val)) // Can be slow -}) - -// Better: Parse only when needed -const schema = z.object({ - id: z.string(), - data: z.string() // Store as string, parse in components -}) -``` - ---- - -### 8. Error Handling - -**Schema Validation Errors:** -```typescript -import { SchemaValidationError } from '@tanstack/db' - -try { - collection.insert({ - id: "1", - email: "invalid-email", - age: -5 - }) -} catch (error) { - if (error instanceof SchemaValidationError) { - console.log(error.type) // 'insert' or 'update' - console.log(error.issues) // Array of validation issues - - error.issues.forEach(issue => { - console.log(issue.path) // ['email'] or ['age'] - console.log(issue.message) // "Invalid email address" - }) - } -} -``` - -**In Sync Handlers:** -```typescript -sync: ({ write, begin, commit }) => { - begin() - for (const row of sqliteData) { - try { - const validated = collection.validateData(row, 'insert') - write({ type: 'insert', value: validated }) - } catch (error) { - if (error instanceof SchemaValidationError) { - // Handle invalid data from storage - console.error('Invalid data in storage:', error.issues) - continue // Skip this row - } - throw error - } - } - commit() -} -``` - ---- - -### 9. Complete Working Examples - -**Example 1: Todo App with Rich Types** -```typescript -const todoSchema = z.object({ - id: z.string(), - text: z.string().min(1, "Todo text cannot be empty"), - completed: z.boolean().default(false), - priority: z.enum(['low', 'medium', 'high']).default('medium'), - due_date: z.string().transform(val => new Date(val)).optional(), - created_at: z.date().default(() => new Date()), - tags: z.array(z.string()).default([]) -}) - -const todoCollection = createCollection( - queryCollectionOptions({ - queryKey: ['todos'], - queryFn: async () => api.todos.getAll(), - getKey: (item) => item.id, - schema: todoSchema, - onInsert: async ({ transaction }) => { - const todo = transaction.mutations[0].modified // TOutput - - // Serialize for API - await api.todos.create({ - ...todo, - due_date: todo.due_date?.toISOString(), // Date β†’ string - created_at: todo.created_at.toISOString() - }) - } - }) -) - -// Usage - users provide simple inputs -todoCollection.insert({ - id: crypto.randomUUID(), - text: "Buy groceries", - due_date: "2024-12-31T23:59:59Z" - // completed, priority, created_at, tags filled automatically -}) - -// Reading returns rich types -const todo = todoCollection.get(id) -console.log(todo.due_date.getTime()) // It's a Date! -console.log(todo.priority) // Type-safe enum -``` - -**Example 2: E-commerce Product with Computed Fields** -```typescript -const productSchema = z.object({ - id: z.string(), - name: z.string(), - base_price: z.number().positive(), - tax_rate: z.number().min(0).max(1).default(0.1), - discount_percent: z.number().min(0).max(100).default(0) -}).transform(data => ({ - ...data, - // Computed field - final_price: data.base_price * (1 + data.tax_rate) * (1 - data.discount_percent / 100) -})) - -// User provides base data -collection.insert({ - id: "1", - name: "Widget", - base_price: 100, - discount_percent: 20 - // tax_rate defaults to 0.1 -}) - -// Reading returns computed field -const product = collection.get("1") -console.log(product.final_price) // 88 (100 * 1.1 * 0.8) -``` - ---- - -### 10. Related Topics - -**See Also:** -- [Mutations Guide](./mutations.md) - Using schemas with mutation handlers -- [Error Handling Guide](./error-handling.md) - Handling SchemaValidationError -- [Creating Collection Options](./collection-options-creator.md) - Integration authors: handling schemas in custom integrations -- [StandardSchema Specification](https://standardschema.dev) - Full schema specification - ---- - -## Changes to Existing Docs - -### 1. Update `overview.md` (lines 144-154) - -**Replace:** -```markdown -#### Collection schemas - -All collections optionally (though strongly recommended) support adding a `schema`. - -If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as a [Zod](https://zod.dev) or [Effect](https://effect.website/docs/schema/introduction/) schema. - -The collection will use the schema to do client-side validation of optimistic mutations. - -The collection will use the schema for its type so if you provide a schema, you can't also pass in an explicit -type (e.g. `createCollection()`). -``` - -**With:** -```markdown -#### Collection schemas - -All collections optionally (though strongly recommended) support adding a `schema`. - -If provided, this must be a [Standard Schema](https://standardschema.dev) compatible schema instance, such as [Zod](https://zod.dev), [Valibot](https://valibot.dev), [ArkType](https://arktype.io), or [Effect](https://effect.website/docs/schema/introduction/) schemas. - -Schemas provide three key benefits: - -1. **Runtime validation**: Ensures data meets your constraints before entering the collection -2. **Type transformations**: Convert input types (strings) to rich output types (Date objects) -3. **Default values**: Automatically populate missing fields - -The collection will use the schema for its type, so if you provide a schema, you can't also pass in an explicit -type parameter (e.g., `createCollection()`). - -For a comprehensive guide on schema validation and type transformations, see the [Schemas guide](../guides/schemas.md). -``` - -### 2. Add to `mutations.md` (after line 154) - -Add a note in the mutation handlers section: - -```markdown -> [!TIP] -> Schemas automatically validate and transform data during mutations. For example, you can use schemas to convert string inputs to Date objects. See the [Schemas guide](./schemas.md) for details on schema validation and type transformations. -``` - -### 3. Update `collection-options-creator.md` (after line 66) - -Add a section on schemas: - -```markdown -### 3. Schema Handling - -When implementing a collection options creator for a sync engine, you must handle schema transformations correctly: - -```typescript -// When syncing FROM storage TO TanStack DB -sync: ({ write, collection }) => { - const storageData = await fetchFromStorage() - - // Deserialize: storage format β†’ TOutput - const transformed = collection.validateData(storageData, 'insert') - - // Write TOutput to collection - write({ type: 'insert', value: transformed }) -} - -// When persisting FROM TanStack DB TO storage -onInsert: async ({ transaction }) => { - const item = transaction.mutations[0].modified // TOutput - - // Serialize: TOutput β†’ storage format - const serialized = serializeForStorage(item) - - // Write to storage - await storage.write(serialized) -} -``` - -**Key principles:** -- Your integration layer handles serialization between storage format and TOutput -- Always call `collection.validateData()` when syncing data INTO the collection -- Manually serialize when persisting data FROM the collection to storage -- Don't constrain user schemas to match storage types - -For a comprehensive guide, see [Schemas guide](./schemas.md#integration-specific-guidance). -``` - ---- - -## Implementation Checklist - -- [ ] Create `docs/guides/schemas.md` with the content above -- [ ] Update `overview.md` collection schemas section -- [ ] Add schema tip to `mutations.md` -- [ ] Add schema handling section to `collection-options-creator.md` -- [ ] Add link to schemas guide in docs navigation -- [ ] Review and test all code examples -- [ ] Get feedback from integration authors (Electric, PowerSync, TrailBase teams) - ---- - -## Success Metrics - -After implementation, developers should be able to: - -1. βœ… Explain the difference between TInput and TOutput -2. βœ… Use schema transformations to convert types (e.g., string β†’ Date) -3. βœ… Apply default values in schemas -4. βœ… Handle both input and existing data in update schemas -5. βœ… Understand where schema validation happens in the system -6. βœ… (Integration authors) Correctly implement serialization/deserialization - -This should significantly reduce confusion like the PowerSync team experienced. From be17cff524aade73b6c9bed27af11dc62eee094b Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Wed, 5 Nov 2025 08:43:14 -0700 Subject: [PATCH 9/9] docs: clarify TInput must be superset of TOutput requirement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PR review feedback from samwillis about the critical design principle that TInput must be a superset of TOutput when using transformations. Key improvements: - Add prominent "Critical Design Principle" section explaining why TInput must accept all TOutput values - Clarify that union types are REQUIRED (not optional) for transformations - Add clear ❌/βœ… examples showing what breaks and why - Explain the draft parameter typing issue in collection.update() - Strengthen language in Best Practices from "should" to "must" This makes it clear that when schemas transform type A to type B, you must use z.union([A, B]) to ensure updates work correctly. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- docs/guides/schemas.md | 62 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/docs/guides/schemas.md b/docs/guides/schemas.md index 4d9451741..b51296b43 100644 --- a/docs/guides/schemas.md +++ b/docs/guides/schemas.md @@ -118,6 +118,33 @@ const todoSchema = z.object({ The schema acts as a **boundary** that transforms TInput β†’ TOutput. +### Critical Design Principle: TInput Must Be a Superset of TOutput + +When using transformations, **TInput must accept all values that TOutput contains**. This is essential for updates to work correctly. + +Here's why: when you call `collection.update(id, (draft) => {...})`, the `draft` parameter is typed as `TInput` but contains data that's already been transformed to `TOutput`. For this to work without complex type gymnastics, your schema must accept both the input format AND the output format. + +```typescript +// ❌ BAD: TInput only accepts strings +const schema = z.object({ + created_at: z.string().transform(val => new Date(val)) +}) +// TInput: { created_at: string } +// TOutput: { created_at: Date } +// Problem: draft.created_at is a Date, but TInput only accepts string! + +// βœ… GOOD: TInput accepts both string and Date (superset of TOutput) +const schema = z.object({ + created_at: z.union([z.string(), z.date()]) + .transform(val => typeof val === 'string' ? new Date(val) : val) +}) +// TInput: { created_at: string | Date } +// TOutput: { created_at: Date } +// Success: draft.created_at can be a Date because TInput accepts Date! +``` + +**Rule of thumb:** If your schema transforms type A to type B, use `z.union([A, B])` to ensure TInput accepts both. + ### Why This Matters **All data in your collection is TOutput:** @@ -541,39 +568,43 @@ collection.update("1", (draft) => { }) ``` -### When You Need Union Types +### Accepting Date Input from External Sources -If you're accepting date input from external sources (forms, APIs), you may need to accept both strings and Date objects: +If you're accepting date input from external sources (forms, APIs), you must use union types to accept both strings and Date objects. This ensures TInput is a superset of TOutput: ```typescript const eventSchema = z.object({ id: z.string(), name: z.string(), scheduled_for: z.union([ - z.string(), // Accept ISO string from form input - z.date() // Accept Date from existing data or programmatic input + z.string(), // Accept ISO string from form input (part of TInput) + z.date() // Accept Date from existing data (TOutput) or programmatic input ]).transform(val => typeof val === 'string' ? new Date(val) : val ) }) +// TInput: { scheduled_for: string | Date } +// TOutput: { scheduled_for: Date } +// βœ… TInput is a superset of TOutput (accepts both string and Date) -// Works with string input +// Works with string input (new data) collection.insert({ id: "1", name: "Meeting", scheduled_for: "2024-12-31T15:00:00Z" // From form input }) -// Works with Date input +// Works with Date input (programmatic) collection.insert({ id: "2", name: "Workshop", - scheduled_for: new Date() // Programmatic + scheduled_for: new Date() }) -// Updates work - scheduled_for is already a Date +// Updates work - scheduled_for is already a Date, and TInput accepts Date collection.update("1", (draft) => { draft.name = "Updated Meeting" + // draft.scheduled_for is a Date and can be used or modified }) ``` @@ -706,23 +737,30 @@ const schema = z.object({ const processedData = expensiveParsingOperation(todo.data) ``` -### Use Union Types for Updates +### Use Union Types for Transformations (Essential) -Always use union types when transforming to different output types: +When your schema transforms data to a different type, you **must** use union types to ensure TInput is a superset of TOutput. This is not optional - updates will fail without it. ```typescript -// βœ… Good: Handles both input and existing data +// βœ… REQUIRED: TInput accepts both string (new data) and Date (existing data) const schema = z.object({ created_at: z.union([z.string(), z.date()]) .transform(val => typeof val === 'string' ? new Date(val) : val) }) +// TInput: { created_at: string | Date } +// TOutput: { created_at: Date } -// ❌ Bad: Will fail on updates +// ❌ WILL BREAK: Updates fail because draft contains Date but TInput only accepts string const schema = z.object({ created_at: z.string().transform(val => new Date(val)) }) +// TInput: { created_at: string } +// TOutput: { created_at: Date } +// Problem: collection.update() passes a Date to a schema expecting string! ``` +**Why this is required:** During `collection.update()`, the `draft` object contains TOutput data (already transformed). The schema must accept this data, which means TInput must be a superset of TOutput. + ### Validate at the Boundary Let the collection schema handle validation. Don't duplicate validation logic: