From 85413dd69b8427f84b2641d399e8101bcb9631a8 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Mon, 29 Sep 2025 11:32:33 +0100 Subject: [PATCH 1/3] add test --- packages/db/tests/query/join.test.ts | 47 +++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/packages/db/tests/query/join.test.ts b/packages/db/tests/query/join.test.ts index 9b2d3ef24..be9184609 100644 --- a/packages/db/tests/query/join.test.ts +++ b/packages/db/tests/query/join.test.ts @@ -1,5 +1,9 @@ import { beforeEach, describe, expect, test } from "vitest" -import { createLiveQueryCollection, eq } from "../../src/query/index.js" +import { + createLiveQueryCollection, + eq, + isUndefined, +} from "../../src/query/index.js" import { createCollection } from "../../src/collection/index.js" import { mockSyncCollectionOptions } from "../utils.js" @@ -1405,6 +1409,47 @@ function createJoinTests(autoIndex: `off` | `eager`): void { `Invalid join condition: both expressions refer to the same table "user"` ) }) + + test(`should not push down isUndefined(namespace) to subquery`, () => { + const usersCollection = createUsersCollection() + const specialUsersCollection = createCollection( + mockSyncCollectionOptions({ + id: `special-users`, + getKey: (user) => user.id, + initialData: [{ id: 1, special: true }], + }) + ) + + const joinQuery = createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: usersCollection }) + .leftJoin( + { special: specialUsersCollection }, + ({ user, special }) => eq(user.id, special.id) + ) + .where(({ special }) => isUndefined(special)), + }) + + // Should return users that don't have a matching special user + // This should be users with IDs 2, 3, 4 (since only user 1 has a special record) + const results = joinQuery.toArray + expect(results).toHaveLength(3) + + // All results should have special as undefined + for (const row of results) { + expect(row.special).toBeUndefined() + } + + // Should not include user 1 (Alice) since she has a special record + const aliceResult = results.find((r) => r.user.id === 1) + expect(aliceResult).toBeUndefined() + + // Should include users 2, 3, 4 (Bob, Charlie, Dave) + const userIds = results.map((r) => r.user.id).sort() + expect(userIds).toEqual([2, 3, 4]) + }) }) } From ef18a7155fb9815cdcbaa66a1eeee8d900ace22f Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Mon, 29 Sep 2025 11:37:33 +0100 Subject: [PATCH 2/3] the fix --- packages/db/src/query/optimizer.ts | 34 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index e9f35fefb..55b3c083b 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -142,6 +142,8 @@ export interface AnalyzedWhereClause { expression: BasicExpression /** Set of table/source aliases that this WHERE clause touches */ touchedSources: Set + /** Whether this clause contains namespace-only references that prevent pushdown */ + hasNamespaceOnlyRef: boolean } /** @@ -486,19 +488,31 @@ function splitAndClausesRecursive( * This determines whether a clause can be pushed down to a specific table * or must remain in the main query (for multi-source clauses like join conditions). * + * Special handling for namespace-only references in outer joins: + * WHERE clauses that reference only a table namespace (e.g., isUndefined(special), eq(special, value)) + * rather than specific properties (e.g., isUndefined(special.id), eq(special.id, value)) are treated as + * multi-source to prevent incorrect predicate pushdown that would change join semantics. + * * @param clause - The WHERE expression to analyze * @returns Analysis result with the expression and touched source aliases * * @example * ```typescript - * // eq(users.department_id, 1) -> touches ['users'] - * // eq(users.id, posts.user_id) -> touches ['users', 'posts'] + * // eq(users.department_id, 1) -> touches ['users'], hasNamespaceOnlyRef: false + * // eq(users.id, posts.user_id) -> touches ['users', 'posts'], hasNamespaceOnlyRef: false + * // isUndefined(special) -> touches ['special'], hasNamespaceOnlyRef: true (prevents pushdown) + * // eq(special, someValue) -> touches ['special'], hasNamespaceOnlyRef: true (prevents pushdown) + * // isUndefined(special.id) -> touches ['special'], hasNamespaceOnlyRef: false (allows pushdown) + * // eq(special.id, 5) -> touches ['special'], hasNamespaceOnlyRef: false (allows pushdown) * ``` */ function analyzeWhereClause( clause: BasicExpression ): AnalyzedWhereClause { + // Track which table aliases this WHERE clause touches const touchedSources = new Set() + // Track whether this clause contains namespace-only references that prevent pushdown + let hasNamespaceOnlyRef = false /** * Recursively collect all table aliases referenced in an expression @@ -511,6 +525,13 @@ function analyzeWhereClause( const firstElement = expr.path[0] if (firstElement) { touchedSources.add(firstElement) + + // If the path has only one element (just the namespace), + // this is a namespace-only reference that should not be pushed down + // This applies to ANY function, not just existence-checking functions + if (expr.path.length === 1) { + hasNamespaceOnlyRef = true + } } } break @@ -537,6 +558,7 @@ function analyzeWhereClause( return { expression: clause, touchedSources, + hasNamespaceOnlyRef, } } @@ -557,15 +579,15 @@ function groupWhereClauses( // Categorize each clause based on how many sources it touches for (const clause of analyzedClauses) { - if (clause.touchedSources.size === 1) { - // Single source clause - can be optimized + if (clause.touchedSources.size === 1 && !clause.hasNamespaceOnlyRef) { + // Single source clause without namespace-only references - can be optimized const source = Array.from(clause.touchedSources)[0]! if (!singleSource.has(source)) { singleSource.set(source, []) } singleSource.get(source)!.push(clause.expression) - } else if (clause.touchedSources.size > 1) { - // Multi-source clause - must stay in main query + } else if (clause.touchedSources.size > 1 || clause.hasNamespaceOnlyRef) { + // Multi-source clause or namespace-only reference - must stay in main query multiSource.push(clause.expression) } // Skip clauses that touch no sources (constants) - they don't need optimization From 6c3d586f315715d5bb151e2c780a8a1cf3b4a1e3 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Mon, 29 Sep 2025 11:37:44 +0100 Subject: [PATCH 3/3] changeset --- .changeset/curvy-mangos-yell.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/curvy-mangos-yell.md diff --git a/.changeset/curvy-mangos-yell.md b/.changeset/curvy-mangos-yell.md new file mode 100644 index 000000000..a813a6187 --- /dev/null +++ b/.changeset/curvy-mangos-yell.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Prevent pushing down of where clauses that only touch the namespace of a source, rather than a prop on that namespace. This ensures that the semantics of the query are maintained for things such as `isUndefined(namespace)` after a join.