Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/slimy-lizards-kiss.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@tanstack/db": patch
---

Fix query optimizer to preserve outer join semantics by keeping residual WHERE clauses when pushing predicates to subqueries.
8 changes: 5 additions & 3 deletions packages/db/src/query/compiler/group-by.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { filter, groupBy, groupByOperators, map } from "@tanstack/db-ivm"
import { Func, PropRef } from "../ir.js"
import { Func, PropRef, getHavingExpression } from "../ir.js"
import {
AggregateFunctionNotInSelectError,
NonAggregateExpressionNotInGroupByError,
Expand Down Expand Up @@ -129,8 +129,9 @@ export function processGroupBy(
// Apply HAVING clauses if present
if (havingClauses && havingClauses.length > 0) {
for (const havingClause of havingClauses) {
const havingExpression = getHavingExpression(havingClause)
const transformedHavingClause = transformHavingClause(
havingClause,
havingExpression,
selectClause || {}
)
const compiledHaving = compileExpression(transformedHavingClause)
Expand Down Expand Up @@ -263,8 +264,9 @@ export function processGroupBy(
// Apply HAVING clauses if present
if (havingClauses && havingClauses.length > 0) {
for (const havingClause of havingClauses) {
const havingExpression = getHavingExpression(havingClause)
const transformedHavingClause = transformHavingClause(
havingClause,
havingExpression,
selectClause || {}
)
const compiledHaving = compileExpression(transformedHavingClause)
Expand Down
5 changes: 3 additions & 2 deletions packages/db/src/query/compiler/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
LimitOffsetRequireOrderByError,
UnsupportedFromTypeError,
} from "../../errors.js"
import { PropRef } from "../ir.js"
import { PropRef, getWhereExpression } from "../ir.js"
import { compileExpression } from "./evaluators.js"
import { processJoins } from "./joins.js"
import { processGroupBy } from "./group-by.js"
Expand Down Expand Up @@ -131,7 +131,8 @@ export function compileQuery(
if (query.where && query.where.length > 0) {
// Apply each WHERE condition as a filter (they are ANDed together)
for (const where of query.where) {
const compiledWhere = compileExpression(where)
const whereExpression = getWhereExpression(where)
const compiledWhere = compileExpression(whereExpression)
pipeline = pipeline.pipe(
filter(([_key, namespacedRow]) => {
return compiledWhere(namespacedRow)
Expand Down
49 changes: 48 additions & 1 deletion packages/db/src/query/ir.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ export interface JoinClause {
right: BasicExpression
}

export type Where = BasicExpression<boolean>
export type Where =
| BasicExpression<boolean>
| { expression: BasicExpression<boolean>; residual?: boolean }

export type GroupBy = Array<BasicExpression>

Expand Down Expand Up @@ -128,3 +130,48 @@ export class Aggregate<T = any> extends BaseExpression<T> {
super()
}
}

/**
* Helper functions for working with Where clauses
*/

/**
* Extract the expression from a Where clause
*/
export function getWhereExpression(where: Where): BasicExpression<boolean> {
return typeof where === `object` && `expression` in where
? where.expression
: where
}

/**
* Extract the expression from a HAVING clause
* HAVING clauses can contain aggregates, unlike regular WHERE clauses
*/
export function getHavingExpression(
having: Having
): BasicExpression | Aggregate {
return typeof having === `object` && `expression` in having
? having.expression
: having
}

/**
* Check if a Where clause is marked as residual
*/
export function isResidualWhere(where: Where): boolean {
return (
typeof where === `object` &&
`expression` in where &&
where.residual === true
)
}

/**
* Create a residual Where clause from an expression
*/
export function createResidualWhere(
expression: BasicExpression<boolean>
): Where {
return { expression, residual: true }
}
84 changes: 66 additions & 18 deletions packages/db/src/query/optimizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
* - **Ordering + Limits**: ORDER BY combined with LIMIT/OFFSET (would change result set)
* - **Functional Operations**: fnSelect, fnWhere, fnHaving (potential side effects)
*
* ### Residual WHERE Clauses
* For outer joins (LEFT, RIGHT, FULL), WHERE clauses are copied to subqueries for optimization
* but also kept as "residual" clauses in the main query to preserve semantics. This ensures
* that NULL values from outer joins are properly filtered according to SQL standards.
*
* The optimizer tracks which clauses were actually optimized and only removes those from the
* main query. Subquery reuse is handled safely through immutable query copies.
*
Expand Down Expand Up @@ -121,9 +126,12 @@ import {
CollectionRef as CollectionRefClass,
Func,
QueryRef as QueryRefClass,
createResidualWhere,
getWhereExpression,
isResidualWhere,
} from "./ir.js"
import { isConvertibleToCollectionFilter } from "./compiler/expressions.js"
import type { BasicExpression, From, QueryIR } from "./ir.js"
import type { BasicExpression, From, QueryIR, Where } from "./ir.js"

/**
* Represents a WHERE clause after source analysis
Expand Down Expand Up @@ -325,8 +333,13 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR {
return query
}

// Filter out residual WHERE clauses to prevent them from being optimized again
const nonResidualWhereClauses = query.where.filter(
(where) => !isResidualWhere(where)
)

// Step 1: Split all AND clauses at the root level for granular optimization
const splitWhereClauses = splitAndClauses(query.where)
const splitWhereClauses = splitAndClauses(nonResidualWhereClauses)

// Step 2: Analyze each WHERE clause to determine which sources it touches
const analyzedClauses = splitWhereClauses.map((clause) =>
Expand All @@ -337,7 +350,20 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR {
const groupedClauses = groupWhereClauses(analyzedClauses)

// Step 4: Apply optimizations by lifting single-source clauses into subqueries
return applyOptimizations(query, groupedClauses)
const optimizedQuery = applyOptimizations(query, groupedClauses)

// Add back any residual WHERE clauses that were filtered out
const residualWhereClauses = query.where.filter((where) =>
isResidualWhere(where)
)
if (residualWhereClauses.length > 0) {
optimizedQuery.where = [
...(optimizedQuery.where || []),
...residualWhereClauses,
]
}

return optimizedQuery
}

/**
Expand Down Expand Up @@ -424,26 +450,35 @@ function isRedundantSubquery(query: QueryIR): boolean {
* ```
*/
function splitAndClauses(
whereClauses: Array<BasicExpression<boolean>>
whereClauses: Array<Where>
): Array<BasicExpression<boolean>> {
const result: Array<BasicExpression<boolean>> = []

for (const clause of whereClauses) {
if (clause.type === `func` && clause.name === `and`) {
// Recursively split nested AND clauses to handle complex expressions
const splitArgs = splitAndClauses(
clause.args as Array<BasicExpression<boolean>>
)
result.push(...splitArgs)
} else {
// Preserve non-AND clauses as-is (including OR clauses)
result.push(clause)
}
for (const whereClause of whereClauses) {
const clause = getWhereExpression(whereClause)
result.push(...splitAndClausesRecursive(clause))
}

return result
}

// Helper function for recursive splitting of BasicExpression arrays
function splitAndClausesRecursive(
clause: BasicExpression<boolean>
): Array<BasicExpression<boolean>> {
if (clause.type === `func` && clause.name === `and`) {
// Recursively split nested AND clauses to handle complex expressions
const result: Array<BasicExpression<boolean>> = []
for (const arg of clause.args as Array<BasicExpression<boolean>>) {
result.push(...splitAndClausesRecursive(arg))
}
return result
} else {
// Preserve non-AND clauses as-is (including OR clauses)
return [clause]
}
}

/**
* Step 2: Analyze which table sources a WHERE clause touches.
*
Expand Down Expand Up @@ -588,19 +623,32 @@ function applyOptimizations(
}))
: undefined

// Build the remaining WHERE clauses: multi-source + any single-source that weren't optimized
const remainingWhereClauses: Array<BasicExpression<boolean>> = []
// Build the remaining WHERE clauses: multi-source + residual single-source clauses
const remainingWhereClauses: Array<Where> = []

// Add multi-source clauses
if (groupedClauses.multiSource) {
remainingWhereClauses.push(groupedClauses.multiSource)
}

// Add single-source clauses that weren't actually optimized
// Determine if we need residual clauses (when query has outer JOINs)
const hasOuterJoins =
query.join &&
query.join.some(
(join) =>
join.type === `left` || join.type === `right` || join.type === `full`
)

// Add single-source clauses
for (const [source, clause] of groupedClauses.singleSource) {
if (!actuallyOptimized.has(source)) {
// Wasn't optimized at all - keep as regular WHERE clause
remainingWhereClauses.push(clause)
} else if (hasOuterJoins) {
// Was optimized AND query has outer JOINs - keep as residual WHERE clause
remainingWhereClauses.push(createResidualWhere(clause))
}
// If optimized and no outer JOINs - don't keep (original behavior)
}

// Create a completely new query object to ensure immutability
Expand Down
16 changes: 8 additions & 8 deletions packages/db/tests/query/indexes.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ describe(`Query Index Optimization`, () => {
write({
type: `insert`,
value: {
id: `other1`,
id: `1`, // Matches Alice from main collection
name: `Other Active Item`,
age: 40,
status: `active`,
Expand All @@ -641,7 +641,7 @@ describe(`Query Index Optimization`, () => {
write({
type: `insert`,
value: {
id: `other2`,
id: `2`, // Matches Bob from main collection
name: `Other Inactive Item`,
age: 35,
status: `inactive`,
Expand Down Expand Up @@ -970,11 +970,11 @@ describe(`Query Index Optimization`, () => {

await liveQuery.stateWhenReady()

// Should include all results from the first collection
// Should only include results where both sides match the WHERE condition
// Charlie and Eve are filtered out because they have no matching 'other' records
// and the WHERE clause requires other.status = 'active' (can't be NULL)
expect(liveQuery.toArray).toEqual([
{ id: `1`, name: `Alice`, otherName: `Other Active Item` },
{ id: `3`, name: `Charlie` },
{ id: `5`, name: `Eve` },
])

// Combine stats from both collections
Expand Down Expand Up @@ -1100,11 +1100,11 @@ describe(`Query Index Optimization`, () => {

await liveQuery.stateWhenReady()

// Should have found results where both items are active
// Should only include results where both sides match the WHERE condition
// Charlie and Eve are filtered out because they have no matching 'other' records
// and the WHERE clause requires other.status = 'active' (can't be NULL)
expect(liveQuery.toArray).toEqual([
{ id: `1`, name: `Alice`, otherName: `Other Active Item` },
{ id: `3`, name: `Charlie` },
{ id: `5`, name: `Eve` },
])

// We should have done an index lookup on the left collection to find active items
Expand Down
Loading