From ac638753f95b3d1f7a50af2c365eb69d6f36636c Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 17 Feb 2026 12:04:52 +0100 Subject: [PATCH 1/5] test(db): add test for isUndefined filter on left-joined field Verifies that `.where(({ r }) => isUndefined(r?.payload))` after a leftJoin correctly filters out rows where the right side matched but the field has a defined value, rather than stripping the right side to undefined and keeping all rows. Co-Authored-By: Claude Opus 4.6 --- packages/db/tests/query/join.test.ts | 63 ++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/packages/db/tests/query/join.test.ts b/packages/db/tests/query/join.test.ts index d0918564e..2f3ce1492 100644 --- a/packages/db/tests/query/join.test.ts +++ b/packages/db/tests/query/join.test.ts @@ -1840,6 +1840,69 @@ function createJoinTests(autoIndex: `off` | `eager`): void { expect(result.join2!.value).toBe(1) expect(result.join2!.other).toBe(30) }) + + test(`where with isUndefined on right-side field should filter entire joined rows`, () => { + type Left = { + id: string + rightId: string | null + } + + type Right = { + id: string + payload: string | null | undefined + } + + const leftCollection = createCollection( + mockSyncCollectionOptions({ + id: `test-left-isundefined-field`, + getKey: (item) => item.id, + initialData: [ + { id: `l1`, rightId: `r1` }, + { id: `l2`, rightId: `r2` }, + { id: `l3`, rightId: `r3` }, + { id: `l4`, rightId: null }, + ], + autoIndex, + }), + ) + + const rightCollection = createCollection( + mockSyncCollectionOptions({ + id: `test-right-isundefined-field`, + getKey: (item) => item.id, + initialData: [ + { id: `r1`, payload: `ok` }, + { id: `r2`, payload: null }, + { id: `r3`, payload: undefined }, + ], + autoIndex, + }), + ) + + const lq = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ l: leftCollection }) + .leftJoin( + { r: rightCollection }, + ({ l, r }) => eq(l.rightId, r.id), + ) + .where(({ r }) => isUndefined(r?.payload)) + .select(({ l, r }) => ({ leftId: l.id, right: r })), + }) + + const data = lq.toArray + + // l1 joins r1 (payload='ok') → payload is defined → exclude + // l2 joins r2 (payload=null) → payload is null, not undefined → exclude + // l3 joins r3 (payload=undefined) → payload is undefined → include + // l4 has no match (rightId=null) → right is undefined, so r?.payload is undefined → include + expect(data.sort((a, b) => a.leftId.localeCompare(b.leftId))).toEqual([ + { leftId: `l3`, right: { id: `r3`, payload: undefined } }, + { leftId: `l4`, right: undefined }, + ]) + }) } describe(`Query JOIN Operations`, () => { From cf9e586c217d19b221889b78c2541f74e1c66888 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 17 Feb 2026 11:17:04 +0000 Subject: [PATCH 2/5] ci: apply automated fixes --- packages/db/tests/query/join.test.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/db/tests/query/join.test.ts b/packages/db/tests/query/join.test.ts index 2f3ce1492..441be59bc 100644 --- a/packages/db/tests/query/join.test.ts +++ b/packages/db/tests/query/join.test.ts @@ -1884,10 +1884,7 @@ function createJoinTests(autoIndex: `off` | `eager`): void { query: (q) => q .from({ l: leftCollection }) - .leftJoin( - { r: rightCollection }, - ({ l, r }) => eq(l.rightId, r.id), - ) + .leftJoin({ r: rightCollection }, ({ l, r }) => eq(l.rightId, r.id)) .where(({ r }) => isUndefined(r?.payload)) .select(({ l, r }) => ({ leftId: l.id, right: r })), }) From a86559fde29a3271098e8516eacd60902d864634 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 17 Feb 2026 13:48:28 +0100 Subject: [PATCH 3/5] fix(db): don't push WHERE clauses to nullable side of outer joins The query optimizer was pushing single-source WHERE clauses into subqueries and sourceWhereClauses for the nullable side of outer joins (right side of LEFT, left side of RIGHT, both for FULL). This pre-filtered the data before the join, converting rows that should have been excluded by the WHERE into unmatched outer-join rows that then incorrectly survived the residual filter. The fix identifies nullable join sources and excludes them from predicate pushdown and index optimization extraction. Clauses for those sources are kept as post-join filters on the main pipeline. Co-Authored-By: Claude Opus 4.6 --- packages/db/src/query/optimizer.ts | 74 ++++++++++++++++--- packages/db/tests/query/indexes.test.ts | 34 ++++----- packages/db/tests/query/optimizer.test.ts | 89 +++++------------------ 3 files changed, 98 insertions(+), 99 deletions(-) diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index e1020c284..e962f296f 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -219,8 +219,11 @@ export function optimizeQuery(query: QueryIR): OptimizationResult { /** * Extracts collection-specific WHERE clauses from a query for index optimization. - * This analyzes the original query to identify WHERE clauses that can be pushed down - * to specific collections, but only for simple queries without joins. + * This analyzes the original query to identify single-source WHERE clauses that + * reference collection sources (not subqueries), including joined collections. + * + * For outer joins, clauses referencing the nullable side are excluded because + * using them to pre-filter collection data would change join semantics. * * @param query - The original QueryIR to analyze * @returns Map of source aliases to their WHERE clauses @@ -246,10 +249,19 @@ function extractSourceWhereClauses( // Group clauses by single-source vs multi-source const groupedClauses = groupWhereClauses(analyzedClauses) + // Determine which source aliases are on the nullable side of outer joins. + // WHERE clauses for these sources must not be used for index optimization + // because they should filter the final joined result, not the input data. + const nullableSources = getNullableJoinSources(query) + // Only include single-source clauses that reference collections directly + // and are not on the nullable side of an outer join for (const [sourceAlias, whereClause] of groupedClauses.singleSource) { // Check if this source alias corresponds to a collection reference - if (isCollectionReference(query, sourceAlias)) { + if ( + isCollectionReference(query, sourceAlias) && + !nullableSources.has(sourceAlias) + ) { sourceWhereClauses.set(sourceAlias, whereClause) } } @@ -283,6 +295,36 @@ function isCollectionReference(query: QueryIR, sourceAlias: string): boolean { return false } +/** + * Returns the set of source aliases that are on the nullable side of outer joins. + * + * For a LEFT join the joined (right) side is nullable. + * For a RIGHT join the main (left/from) side is nullable. + * For a FULL join both sides are nullable. + * + * WHERE clauses that reference only a nullable source must not be pushed down + * into that source's subquery or used for index optimization, because doing so + * changes the join semantics: rows that should be excluded by the WHERE become + * unmatched outer-join rows (with the nullable side set to undefined) and + * incorrectly survive residual filtering. + */ +function getNullableJoinSources(query: QueryIR): Set { + const nullable = new Set() + if (query.join) { + const mainAlias = query.from.alias + for (const join of query.join) { + const joinedAlias = join.from.alias + if (join.type === `left` || join.type === `full`) { + nullable.add(joinedAlias) + } + if (join.type === `right` || join.type === `full`) { + nullable.add(mainAlias) + } + } + } + return nullable +} + /** * Applies recursive predicate pushdown optimization. * @@ -635,10 +677,25 @@ function applyOptimizations( // Track which single-source clauses were actually optimized const actuallyOptimized = new Set() + // Determine which source aliases are on the nullable side of outer joins. + const nullableSources = getNullableJoinSources(query) + + // Build a filtered copy of singleSource that excludes nullable-side clauses. + // Pushing a WHERE clause into the nullable side's subquery pre-filters the + // data before the join, converting "matched but WHERE-excluded" rows into + // "unmatched" outer-join rows. These are indistinguishable from genuinely + // unmatched rows, so the residual WHERE cannot correct the result. + const pushableSingleSource = new Map>() + for (const [source, clause] of groupedClauses.singleSource) { + if (!nullableSources.has(source)) { + pushableSingleSource.set(source, clause) + } + } + // Optimize the main FROM clause and track what was optimized const optimizedFrom = optimizeFromWithTracking( query.from, - groupedClauses.singleSource, + pushableSingleSource, actuallyOptimized, ) @@ -648,7 +705,7 @@ function applyOptimizations( ...joinClause, from: optimizeFromWithTracking( joinClause.from, - groupedClauses.singleSource, + pushableSingleSource, actuallyOptimized, ), })) @@ -663,12 +720,7 @@ function applyOptimizations( } // Determine if we need residual clauses (when query has outer JOINs) - const hasOuterJoins = - query.join && - query.join.some( - (join) => - join.type === `left` || join.type === `right` || join.type === `full`, - ) + const hasOuterJoins = nullableSources.size > 0 // Add single-source clauses for (const [source, clause] of groupedClauses.singleSource) { diff --git a/packages/db/tests/query/indexes.test.ts b/packages/db/tests/query/indexes.test.ts index 9b221443c..a826861aa 100644 --- a/packages/db/tests/query/indexes.test.ts +++ b/packages/db/tests/query/indexes.test.ts @@ -679,13 +679,14 @@ describe(`Query Index Optimization`, () => { ], } - // Should use index optimization for both WHERE clauses - // since they each touch only a single source and both sources are indexed + // The WHERE clause on the non-nullable (left) side uses its index. + // The WHERE clause on the nullable (right) side of the LEFT JOIN is NOT + // pushed down to avoid changing join semantics, so the right side does a full scan. expectIndexUsage(combinedStats, { shouldUseIndex: true, - shouldUseFullScan: false, - indexCallCount: 2, // Both item.status='active' and other.status='active' can use indexes - fullScanCallCount: 0, + shouldUseFullScan: true, + indexCallCount: 1, // Only item.status='active' uses index (non-nullable side) + fullScanCallCount: 1, // other collection does full scan (nullable side) }) } finally { tracker1.restore() @@ -1177,21 +1178,18 @@ describe(`Query Index Optimization`, () => { { id: `1`, name: `Alice`, otherName: `Other Active Item` }, ]) - // We should have done a full scan of the right collection + // The right collection does a full scan (no index on status) expect(tracker2.stats.queriesExecuted).toEqual([ { type: `fullScan`, }, ]) - // We should have done an index lookup on the 1st collection to find active items + // In a RIGHT join, the left (from) side is nullable. The WHERE clause + // eq(item.status, 'active') is NOT pushed down to avoid changing join + // semantics, so the left collection does NOT do an index lookup for status. + // It only does the index lookup for the join key (id) used by lazy loading. expect(tracker1.stats.queriesExecuted).toEqual([ - { - field: `status`, - operation: `eq`, - type: `index`, - value: `active`, - }, { type: `index`, operation: `in`, @@ -1281,14 +1279,12 @@ describe(`Query Index Optimization`, () => { }, ]) - // We should have done an index lookup on the left collection to find active items - // because it has an index on the join key + // In a RIGHT join, the left (from) side is nullable. The WHERE clause + // eq(item.status, 'active') is NOT pushed down to avoid changing join + // semantics, so the left collection does a full scan. expect(tracker1.stats.queriesExecuted).toEqual([ { - type: `index`, - operation: `eq`, - field: `status`, - value: `active`, + type: `fullScan`, }, ]) } finally { diff --git a/packages/db/tests/query/optimizer.test.ts b/packages/db/tests/query/optimizer.test.ts index 77ff8cbed..6b0912044 100644 --- a/packages/db/tests/query/optimizer.test.ts +++ b/packages/db/tests/query/optimizer.test.ts @@ -1649,35 +1649,18 @@ describe(`Query Optimizer`, () => { const { optimizedQuery } = optimizeQuery(query) // The WHERE clause should remain in the main query to preserve LEFT JOIN semantics - // It should NOT be completely moved to the subquery + // It must NOT be pushed down to the nullable (right) side because that would + // pre-filter the right data, converting excluded matches into unmatched rows expect(optimizedQuery.where).toHaveLength(1) - expect(optimizedQuery.where![0]).toEqual({ - expression: createEq( + expect(optimizedQuery.where![0]).toEqual( + createEq( createPropRef(`teamMember`, `user_id`), createValue(100), ), - residual: true, - }) + ) - // If the optimizer creates a subquery for teamMember, the WHERE clause should also be copied there - // but a residual copy must remain in the main query - if ( - optimizedQuery.join && - optimizedQuery.join[0]?.from.type === `queryRef` - ) { - const teamMemberSubquery = optimizedQuery.join[0].from.query - // The subquery may have the WHERE clause for optimization - if (teamMemberSubquery.where && teamMemberSubquery.where.length > 0) { - // But the main query MUST still have it to preserve semantics - expect(optimizedQuery.where).toContainEqual({ - expression: createEq( - createPropRef(`teamMember`, `user_id`), - createValue(100), - ), - residual: true, - }) - } - } + // The optimizer should NOT create a subquery for the nullable side + expect(optimizedQuery.join![0]!.from.type).toBe(`collectionRef`) }) test(`should preserve WHERE clause semantics when pushing down to RIGHT JOIN`, () => { @@ -1715,32 +1698,17 @@ describe(`Query Optimizer`, () => { const { optimizedQuery } = optimizeQuery(query) // The WHERE clause should remain in the main query to preserve RIGHT JOIN semantics - // It should NOT be completely moved to the subquery + // It must NOT be pushed down to the nullable (left/from) side expect(optimizedQuery.where).toHaveLength(1) - expect(optimizedQuery.where![0]).toEqual({ - expression: createEq( + expect(optimizedQuery.where![0]).toEqual( + createEq( createPropRef(`user`, `department_id`), createValue(1), ), - residual: true, - }) + ) - // If the optimizer creates a subquery for users, the WHERE clause should also be copied there - // but a residual copy must remain in the main query - if (optimizedQuery.from.type === `queryRef`) { - const userSubquery = optimizedQuery.from.query - // The subquery may have the WHERE clause for optimization - if (userSubquery.where && userSubquery.where.length > 0) { - // But the main query MUST still have it to preserve semantics - expect(optimizedQuery.where).toContainEqual({ - expression: createEq( - createPropRef(`user`, `department_id`), - createValue(1), - ), - residual: true, - }) - } - } + // The optimizer should NOT create a subquery for the nullable side (from) + expect(optimizedQuery.from.type).toBe(`collectionRef`) }) test(`should preserve WHERE clause semantics when pushing down to FULL JOIN`, () => { @@ -1778,35 +1746,18 @@ describe(`Query Optimizer`, () => { const { optimizedQuery } = optimizeQuery(query) // The WHERE clause should remain in the main query to preserve FULL JOIN semantics - // It should NOT be completely moved to the subquery + // It must NOT be pushed down to any nullable side expect(optimizedQuery.where).toHaveLength(1) - expect(optimizedQuery.where![0]).toEqual({ - expression: createGt( + expect(optimizedQuery.where![0]).toEqual( + createGt( createPropRef(`payment`, `amount`), createValue(100), ), - residual: true, - }) + ) - // If the optimizer creates a subquery for payments, the WHERE clause should also be copied there - // but a residual copy must remain in the main query - if ( - optimizedQuery.join && - optimizedQuery.join[0]?.from.type === `queryRef` - ) { - const paymentSubquery = optimizedQuery.join[0].from.query - // The subquery may have the WHERE clause for optimization - if (paymentSubquery.where && paymentSubquery.where.length > 0) { - // But the main query MUST still have it to preserve semantics - expect(optimizedQuery.where).toContainEqual({ - expression: createGt( - createPropRef(`payment`, `amount`), - createValue(100), - ), - residual: true, - }) - } - } + // The optimizer should NOT create subqueries for nullable sides + expect(optimizedQuery.from.type).toBe(`collectionRef`) + expect(optimizedQuery.join![0]!.from.type).toBe(`collectionRef`) }) test(`should allow WHERE clause pushdown for INNER JOIN (semantics preserved)`, () => { From 74f38d3e34efafc5e6ec7ebff722c5745bd0d92b Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 17 Feb 2026 13:52:16 +0100 Subject: [PATCH 4/5] chore: add changeset for left join WHERE fix Co-Authored-By: Claude Opus 4.6 --- .changeset/fix-left-join-where-filter.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/fix-left-join-where-filter.md diff --git a/.changeset/fix-left-join-where-filter.md b/.changeset/fix-left-join-where-filter.md new file mode 100644 index 000000000..1a03f0797 --- /dev/null +++ b/.changeset/fix-left-join-where-filter.md @@ -0,0 +1,7 @@ +--- +'@tanstack/db': patch +--- + +fix(db): don't push WHERE clauses to nullable side of outer joins + +The query optimizer incorrectly pushed single-source WHERE clauses into subqueries and collection index optimization for the nullable side of outer joins. This pre-filtered the data before the join, converting rows that should have been excluded by the WHERE into unmatched outer-join rows that incorrectly survived the residual filter. From 31340196ed9d29bd77a4ec1ff21767b34ba8ad6a Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 17 Feb 2026 12:53:34 +0000 Subject: [PATCH 5/5] ci: apply automated fixes --- packages/db/tests/query/optimizer.test.ts | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/packages/db/tests/query/optimizer.test.ts b/packages/db/tests/query/optimizer.test.ts index 6b0912044..0952ac2ba 100644 --- a/packages/db/tests/query/optimizer.test.ts +++ b/packages/db/tests/query/optimizer.test.ts @@ -1653,10 +1653,7 @@ describe(`Query Optimizer`, () => { // pre-filter the right data, converting excluded matches into unmatched rows expect(optimizedQuery.where).toHaveLength(1) expect(optimizedQuery.where![0]).toEqual( - createEq( - createPropRef(`teamMember`, `user_id`), - createValue(100), - ), + createEq(createPropRef(`teamMember`, `user_id`), createValue(100)), ) // The optimizer should NOT create a subquery for the nullable side @@ -1701,10 +1698,7 @@ describe(`Query Optimizer`, () => { // It must NOT be pushed down to the nullable (left/from) side expect(optimizedQuery.where).toHaveLength(1) expect(optimizedQuery.where![0]).toEqual( - createEq( - createPropRef(`user`, `department_id`), - createValue(1), - ), + createEq(createPropRef(`user`, `department_id`), createValue(1)), ) // The optimizer should NOT create a subquery for the nullable side (from) @@ -1749,10 +1743,7 @@ describe(`Query Optimizer`, () => { // It must NOT be pushed down to any nullable side expect(optimizedQuery.where).toHaveLength(1) expect(optimizedQuery.where![0]).toEqual( - createGt( - createPropRef(`payment`, `amount`), - createValue(100), - ), + createGt(createPropRef(`payment`, `amount`), createValue(100)), ) // The optimizer should NOT create subqueries for nullable sides