From e46fc81607c20ab14ea3f2879f1fd9df5a0d9b82 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 15:26:25 +0000 Subject: [PATCH 1/9] fix: Optimize queries without joins by combining multiple WHERE clauses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses issue #445 - performance slowdown when using multiple .where() calls. ## Problem When using multiple .where() calls on a query without joins: ```javascript query.from({ item: collection }) .where(({ item }) => eq(item.gridId, gridId)) .where(({ item }) => eq(item.rowId, rowId)) .where(({ item }) => eq(item.side, side)) ``` The optimizer was skipping these queries entirely, leaving multiple WHERE clauses in an array. During query compilation, each WHERE clause was applied as a separate filter() operation in the D2 pipeline, causing a 40%+ performance degradation compared to using a single WHERE clause with AND. ## Solution Modified the optimizer to combine multiple WHERE clauses into a single AND expression for queries without joins. This ensures only one filter operator is added to the pipeline, improving performance while maintaining correct semantics. The optimizer now: 1. Detects queries without joins that have multiple WHERE clauses 2. Combines them using the AND function 3. Reduces pipeline complexity from N filters to 1 filter ## Testing - Updated existing optimizer tests to reflect the new behavior - All 42 optimizer tests pass - Added new test case for combining multiple WHERE clauses without joins 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/db/src/query/optimizer.ts | 17 ++- packages/db/tests/query/optimizer.test.ts | 147 ++++++++++++++-------- 2 files changed, 111 insertions(+), 53 deletions(-) diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index 738eec95a..e10e13a7a 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -330,9 +330,22 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR { return query } - // Skip optimization if there are no joins - predicate pushdown only benefits joins - // Single-table queries don't benefit from this optimization + // For queries without joins, combine multiple WHERE clauses into a single clause + // to avoid creating multiple filter operators in the pipeline (performance optimization for issue #445) if (!query.join || query.join.length === 0) { + // Only optimize if there are multiple WHERE clauses to combine + if (query.where.length > 1) { + // Combine multiple WHERE clauses into a single AND expression + const splitWhereClauses = splitAndClauses(query.where) + const combinedWhere = combineWithAnd(splitWhereClauses) + + return { + ...query, + where: [combinedWhere], + } + } + + // For single WHERE clauses, no optimization needed return query } diff --git a/packages/db/tests/query/optimizer.test.ts b/packages/db/tests/query/optimizer.test.ts index 9e25bb2d4..a9eec06dc 100644 --- a/packages/db/tests/query/optimizer.test.ts +++ b/packages/db/tests/query/optimizer.test.ts @@ -69,16 +69,41 @@ describe(`Query Optimizer`, () => { expect(optimized).toEqual(query) }) - test(`should skip optimization for queries without joins`, () => { + test(`should skip optimization for queries without joins and single WHERE clause`, () => { const query: QueryIR = { from: new CollectionRef(mockCollection, `u`), where: [createEq(createPropRef(`u`, `department_id`), createValue(1))], } const { optimizedQuery: optimized } = optimizeQuery(query) - // Query should remain unchanged since there are no joins to optimize + // Query should remain unchanged since there is only one WHERE clause expect(optimized).toEqual(query) }) + + test(`should combine multiple WHERE clauses for queries without joins`, () => { + const query: QueryIR = { + from: new CollectionRef(mockCollection, `u`), + where: [ + createEq(createPropRef(`u`, `department_id`), createValue(1)), + createGt(createPropRef(`u`, `salary`), createValue(50000)), + createEq(createPropRef(`u`, `active`), createValue(true)), + ], + } + + const { optimizedQuery: optimized } = optimizeQuery(query) + + // The WHERE clauses should be combined into a single AND expression + expect(optimized.where).toHaveLength(1) + expect(optimized.where![0]).toMatchObject({ + type: `func`, + name: `and`, + args: [ + createEq(createPropRef(`u`, `department_id`), createValue(1)), + createGt(createPropRef(`u`, `salary`), createValue(50000)), + createEq(createPropRef(`u`, `active`), createValue(true)), + ], + }) + }) }) describe(`Single Source Optimization with Joins`, () => { @@ -518,16 +543,19 @@ describe(`Query Optimizer`, () => { const { optimizedQuery: optimized } = optimizeQuery(query) - // The existing subquery should have both WHERE clauses + // The existing subquery should have WHERE clauses combined for performance expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { - expect(optimized.from.query.where).toHaveLength(2) - expect(optimized.from.query.where![0]).toEqual( - createGt(createPropRef(`u`, `id`), createValue(50)) - ) - expect(optimized.from.query.where![1]).toEqual( - createEq(createPropRef(`u`, `department_id`), createValue(1)) - ) + // After optimization, the WHERE clauses are combined into a single AND expression + expect(optimized.from.query.where).toHaveLength(1) + expect(optimized.from.query.where![0]).toMatchObject({ + type: `func`, + name: `and`, + args: [ + createGt(createPropRef(`u`, `id`), createValue(50)), + createEq(createPropRef(`u`, `department_id`), createValue(1)), + ], + }) } }) @@ -558,10 +586,11 @@ describe(`Query Optimizer`, () => { const { optimizedQuery: optimized } = optimizeQuery(query) - // The deeply nested structure should be preserved and new WHERE clause added + // The deeply nested structure should be preserved and WHERE clauses combined expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { - expect(optimized.from.query.where).toHaveLength(2) + // WHERE clauses are combined for performance + expect(optimized.from.query.where).toHaveLength(1) expect(optimized.from.query.from.type).toBe(`queryRef`) } }) @@ -746,18 +775,20 @@ describe(`Query Optimizer`, () => { const { optimizedQuery: optimized } = optimizeQuery(nestedQuery) - // The new WHERE clause should be pushed to the nested level + // The new WHERE clause should be pushed to the nested level and combined expect(optimized.where).toEqual([]) expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { - // Should have both WHERE clauses at the inner level - expect(optimized.from.query.where).toHaveLength(2) - expect(optimized.from.query.where).toContainEqual( - createGt(createPropRef(`u`, `id`), createValue(10)) - ) - expect(optimized.from.query.where).toContainEqual( - createEq(createPropRef(`u`, `department_id`), createValue(1)) - ) + // WHERE clauses are combined into a single AND expression for performance + expect(optimized.from.query.where).toHaveLength(1) + expect(optimized.from.query.where![0]).toMatchObject({ + type: `func`, + name: `and`, + args: [ + createGt(createPropRef(`u`, `id`), createValue(10)), + createEq(createPropRef(`u`, `department_id`), createValue(1)), + ], + }) } }) @@ -790,20 +821,25 @@ describe(`Query Optimizer`, () => { const { optimizedQuery: optimized } = optimizeQuery(deeplyNestedQuery) - // Should at least push the top-level WHERE clause down one level + // Should at least push the top-level WHERE clause down one level and combine them expect(optimized.where).toEqual([]) expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { const innerQuery = optimized.from.query - // The department_id clause should be pushed to this level - expect(innerQuery.where).toContainEqual( - createEq(createPropRef(`u`, `department_id`), createValue(1)) - ) - - // The age clause should remain here or be pushed deeper - expect(innerQuery.where).toContainEqual( + // The WHERE clauses should be combined into a single AND expression + expect(innerQuery.where).toHaveLength(1) + expect(innerQuery.where![0]).toMatchObject({ + type: `func`, + name: `and`, + }) + // Verify both conditions are in the combined expression + const combinedWhere = innerQuery.where![0] as any + expect(combinedWhere.args).toContainEqual( createLt(createPropRef(`u`, `age`), createValue(50)) ) + expect(combinedWhere.args).toContainEqual( + createEq(createPropRef(`u`, `department_id`), createValue(1)) + ) } }) @@ -885,16 +921,19 @@ describe(`Query Optimizer`, () => { createEq(createPropRef(`u`, `id`), createPropRef(`p`, `author_id`)) ) - // Single-source clauses should be pushed to their respective subqueries + // Single-source clauses should be pushed to their respective subqueries and combined expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { - expect(optimized.from.query.where).toHaveLength(2) // Original + new clause - expect(optimized.from.query.where).toContainEqual( - createGt(createPropRef(`u`, `age`), createValue(25)) - ) - expect(optimized.from.query.where).toContainEqual( - createEq(createPropRef(`u`, `department_id`), createValue(1)) - ) + // WHERE clauses are combined for performance + expect(optimized.from.query.where).toHaveLength(1) + expect(optimized.from.query.where![0]).toMatchObject({ + type: `func`, + name: `and`, + args: [ + createGt(createPropRef(`u`, `age`), createValue(25)), + createEq(createPropRef(`u`, `department_id`), createValue(1)), + ], + }) } expect(optimized.join).toHaveLength(1) @@ -902,13 +941,16 @@ describe(`Query Optimizer`, () => { const joinClause = optimized.join[0]! expect(joinClause.from.type).toBe(`queryRef`) if (joinClause.from.type === `queryRef`) { - expect(joinClause.from.query.where).toHaveLength(2) // Original + new clause - expect(joinClause.from.query.where).toContainEqual( - createGt(createPropRef(`p`, `views`), createValue(50)) - ) - expect(joinClause.from.query.where).toContainEqual( - createGt(createPropRef(`p`, `rating`), createValue(4)) - ) + // WHERE clauses are combined for performance + expect(joinClause.from.query.where).toHaveLength(1) + expect(joinClause.from.query.where![0]).toMatchObject({ + type: `func`, + name: `and`, + args: [ + createGt(createPropRef(`p`, `views`), createValue(50)), + createGt(createPropRef(`p`, `rating`), createValue(4)), + ], + }) } } }) @@ -1041,18 +1083,21 @@ describe(`Query Optimizer`, () => { const { optimizedQuery: optimized } = optimizeQuery(complexQuery) - // AND clause should be split and single-source parts pushed down + // AND clause should be split and single-source parts pushed down, then combined for performance expect(optimized.where).toEqual([]) expect(optimized.from.type).toBe(`queryRef`) if (optimized.from.type === `queryRef`) { - // Should contain the original condition plus the AND clause (which gets split) - expect(optimized.from.query.where).toContainEqual( + // WHERE clauses should be combined into a single AND expression + expect(optimized.from.query.where).toHaveLength(1) + expect(optimized.from.query.where![0]).toMatchObject({ + type: `func`, + name: `and`, + }) + // Verify it contains the original condition and the new conditions + const combinedWhere = optimized.from.query.where![0] as any + expect(combinedWhere.args).toContainEqual( createGt(createPropRef(`u`, `age`), createValue(18)) ) - - // Should have the AND clause pushed down (may be split into components) - const whereClausesLength = optimized.from.query.where?.length || 0 - expect(whereClausesLength).toBeGreaterThan(1) // Should have at least the original + new conditions } }) }) From 5cf78c5a9681dd0e36ce64edfda28efb772a1ba3 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 15:39:28 +0000 Subject: [PATCH 2/9] docs: Add changeset and investigation report for issue #445 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added changeset for the WHERE clause optimization fix - Documented root cause analysis and solution details 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/optimize-multiple-where-clauses.md | 5 + ISSUE-445-INVESTIGATION.md | 162 ++++++++++++++++++ 2 files changed, 167 insertions(+) create mode 100644 .changeset/optimize-multiple-where-clauses.md create mode 100644 ISSUE-445-INVESTIGATION.md diff --git a/.changeset/optimize-multiple-where-clauses.md b/.changeset/optimize-multiple-where-clauses.md new file mode 100644 index 000000000..5521be516 --- /dev/null +++ b/.changeset/optimize-multiple-where-clauses.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Fixed performance issue where using multiple `.where()` calls on queries without joins resulted in 40%+ slowdown. The optimizer now combines multiple WHERE clauses into a single AND expression, reducing the number of filter operators in the query pipeline from N to 1. This makes chaining `.where()` calls perform identically to using a single `.where()` with `and()`. diff --git a/ISSUE-445-INVESTIGATION.md b/ISSUE-445-INVESTIGATION.md new file mode 100644 index 000000000..dd3ebb235 --- /dev/null +++ b/ISSUE-445-INVESTIGATION.md @@ -0,0 +1,162 @@ +# Investigation: Issue #445 - useLiveQuery Performance Problem + +## Summary +Investigated and fixed a performance issue where using multiple `.where()` calls resulted in 40%+ slowdown compared to using a single WHERE clause with AND. + +## Root Cause Analysis + +### The Problem +When users write queries like this: +```javascript +useLiveQuery(q => + q.from({ item: orderCollection }) + .where(({ item }) => eq(item.gridId, gridId)) + .where(({ item }) => eq(item.rowId, rowId)) + .where(({ item }) => eq(item.side, side)) +) +``` + +The optimizer was completely skipping queries without joins, as seen in `optimizer.ts:333-337`: +```typescript +// Skip optimization if there are no joins - predicate pushdown only benefits joins +// Single-table queries don't benefit from this optimization +if (!query.join || query.join.length === 0) { + return query +} +``` + +This meant the three WHERE clauses remained as separate array elements. During query compilation (`compiler/index.ts:185-196`), each WHERE clause was applied as a **separate filter() operation** in the D2 pipeline: + +```typescript +if (query.where && query.where.length > 0) { + for (const where of query.where) { + const whereExpression = getWhereExpression(where) + const compiledWhere = compileExpression(whereExpression) + pipeline = pipeline.pipe( + filter(([_key, namespacedRow]) => { + return compiledWhere(namespacedRow) + }) + ) + } +} +``` + +This creates **three separate filter operators** in the pipeline instead of one, causing unnecessary overhead. + +### Performance Impact +- Each filter operator adds overhead to the pipeline +- Data flows through multiple filter stages instead of a single combined evaluation +- This compounds when rendering many items simultaneously (as reported in the issue) +- Results in 40%+ performance degradation + +## The Solution + +Modified the optimizer to combine multiple WHERE clauses into a single AND expression for queries without joins: + +```typescript +// For queries without joins, combine multiple WHERE clauses into a single clause +// to avoid creating multiple filter operators in the pipeline +if (!query.join || query.join.length === 0) { + if (query.where.length > 1) { + // Combine multiple WHERE clauses into a single AND expression + const splitWhereClauses = splitAndClauses(query.where) + const combinedWhere = combineWithAnd(splitWhereClauses) + + return { + ...query, + where: [combinedWhere], + } + } + return query +} +``` + +### Benefits +1. **Single Pipeline Operator**: Only one filter() operation is added to the pipeline instead of N operations +2. **Consistent Performance**: Performance matches single WHERE with AND +3. **Semantically Equivalent**: Multiple WHERE clauses are still ANDed together, just more efficiently +4. **Applies Broadly**: Works for simple FROM queries as well as nested subqueries + +## Implementation Details + +### Files Changed +1. **`packages/db/src/query/optimizer.ts`**: Added WHERE clause combining logic for queries without joins +2. **`packages/db/tests/query/optimizer.test.ts`**: Updated tests to expect combined WHERE clauses + +### Testing +- All 42 optimizer tests pass +- Added new test case: "should combine multiple WHERE clauses for queries without joins" +- Updated 5 existing tests to reflect the new optimization behavior + +### Before vs After + +**Before (3 separate filters):** +``` +FROM collection +→ filter(gridId = x) +→ filter(rowId = y) +→ filter(side = z) +``` + +**After (1 combined filter):** +``` +FROM collection +→ filter(AND(gridId = x, rowId = y, side = z)) +``` + +## Impact on Other Query Types + +The optimization is **safe** and applies only to: +- Queries **without** joins +- Queries with **multiple** WHERE clauses (2 or more) +- Both direct collection references and subqueries + +It does **not** affect: +- Queries with joins (these already go through predicate pushdown optimization) +- Queries with a single WHERE clause (no need to combine) +- Functional WHERE clauses (`fn.where()`) + +## Next Steps + +### For the Issue Reporter +Please test the fix with your reproduction case. The performance should now match or exceed your Redux selectors. + +### For Maintainers +Consider whether this optimization should also apply to: +1. Functional WHERE clauses (`fn.where()`) +2. HAVING clauses (similar pattern exists) + +## Performance Verification + +To verify the fix, compare: + +```javascript +// Multiple WHERE calls (now optimized) +query.from({ item: collection }) + .where(({ item }) => eq(item.gridId, gridId)) + .where(({ item }) => eq(item.rowId, rowId)) + .where(({ item }) => eq(item.side, side)) + +// Single WHERE with AND (already fast) +query.from({ item: collection }) + .where(({ item }) => and( + eq(item.gridId, gridId), + eq(item.rowId, rowId), + eq(item.side, side) + )) +``` + +Both should now have identical performance characteristics. + +## Related Code Locations + +- Query Optimizer: `packages/db/src/query/optimizer.ts` +- Query Compiler: `packages/db/src/query/compiler/index.ts` +- WHERE Evaluation: `packages/db/src/query/compiler/evaluators.ts` +- Optimizer Tests: `packages/db/tests/query/optimizer.test.ts` + +## References + +- Issue: https://github.com/TanStack/db/issues/445 +- Commit: e46fc81 +- Branch: claude/investigate-db-slowdown-011CUdbdVnfi28CckcUPfp5j From e449a9bcf40c67a6a5eecf78edff2ebe3b262fb0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 15:54:14 +0000 Subject: [PATCH 3/9] fix: Complete optimizer fix - combine remaining WHERE clauses after pushdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This completes the fix for issue #445 by implementing the missing "step 3" of the optimizer process. ## Problem (Broader than Initially Identified) The optimizer was missing the final step of combining remaining WHERE clauses after optimization. This affected: 1. Queries WITHOUT joins: All optimization was skipped, leaving multiple WHERE clauses as separate array elements 2. Queries WITH joins: After predicate pushdown, remaining WHERE clauses (multi-source + unpushable single-source) were left as separate elements Both cases resulted in multiple filter() operations in the pipeline instead of a single combined filter, causing 40%+ performance degradation. ## Solution Implemented "step 3" (combine remaining WHERE clauses) in two places: 1. **applySingleLevelOptimization**: For queries without joins, combine multiple WHERE clauses before returning 2. **applyOptimizations**: After predicate pushdown for queries with joins, combine all remaining WHERE clauses (multi-source + unpushable) ## Testing - Added test: "should combine multiple remaining WHERE clauses after optimization" - All 43 optimizer tests pass - Updated investigation report with complete analysis - Updated changeset to reflect the complete fix Thanks to colleague feedback for catching that step 3 was missing! 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/optimize-multiple-where-clauses.md | 6 +- ISSUE-445-INVESTIGATION.md | 87 ++++++++++++++----- packages/db/src/query/optimizer.ts | 11 ++- packages/db/tests/query/optimizer.test.ts | 58 +++++++++++++ 4 files changed, 138 insertions(+), 24 deletions(-) diff --git a/.changeset/optimize-multiple-where-clauses.md b/.changeset/optimize-multiple-where-clauses.md index 5521be516..364c54ed0 100644 --- a/.changeset/optimize-multiple-where-clauses.md +++ b/.changeset/optimize-multiple-where-clauses.md @@ -2,4 +2,8 @@ "@tanstack/db": patch --- -Fixed performance issue where using multiple `.where()` calls on queries without joins resulted in 40%+ slowdown. The optimizer now combines multiple WHERE clauses into a single AND expression, reducing the number of filter operators in the query pipeline from N to 1. This makes chaining `.where()` calls perform identically to using a single `.where()` with `and()`. +Fixed performance issue where using multiple `.where()` calls resulted in 40%+ slowdown by creating multiple filter operators in the query pipeline. The optimizer now implements the missing final step (step 3) of combining remaining WHERE clauses into a single AND expression. This applies to both queries with and without joins: +- Queries without joins: Multiple WHERE clauses are now combined before compilation +- Queries with joins: Remaining WHERE clauses after predicate pushdown are combined + +This reduces filter operators from N to 1, making chained `.where()` calls perform identically to using a single `.where()` with `and()`. diff --git a/ISSUE-445-INVESTIGATION.md b/ISSUE-445-INVESTIGATION.md index dd3ebb235..9e138c44e 100644 --- a/ISSUE-445-INVESTIGATION.md +++ b/ISSUE-445-INVESTIGATION.md @@ -1,11 +1,20 @@ # Investigation: Issue #445 - useLiveQuery Performance Problem ## Summary -Investigated and fixed a performance issue where using multiple `.where()` calls resulted in 40%+ slowdown compared to using a single WHERE clause with AND. +Investigated and fixed a performance issue where using multiple `.where()` calls resulted in 40%+ slowdown compared to using a single WHERE clause with AND. The root cause affected **both** queries with and without joins. ## Root Cause Analysis -### The Problem +### The Complete Problem + +The optimizer's intended process is: +1. **Split**: Split all WHERE clauses with "and" at top level into separate clauses +2. **Push down**: Push single-source clauses to subqueries (for queries with joins) +3. **Combine**: Combine all remaining WHERE clauses back into a single one with "and" + +**Step 3 was missing entirely**, causing multiple filter operations in the pipeline. + +### Problem #1: Queries WITHOUT Joins When users write queries like this: ```javascript useLiveQuery(q => @@ -16,7 +25,7 @@ useLiveQuery(q => ) ``` -The optimizer was completely skipping queries without joins, as seen in `optimizer.ts:333-337`: +The optimizer was completely skipping queries without joins (`optimizer.ts:333-337`): ```typescript // Skip optimization if there are no joins - predicate pushdown only benefits joins // Single-table queries don't benefit from this optimization @@ -25,7 +34,28 @@ if (!query.join || query.join.length === 0) { } ``` -This meant the three WHERE clauses remained as separate array elements. During query compilation (`compiler/index.ts:185-196`), each WHERE clause was applied as a **separate filter() operation** in the D2 pipeline: +This meant ALL THREE STEPS were skipped, leaving WHERE clauses as separate array elements. + +### Problem #2: Queries WITH Joins (Broader Issue) +Even for queries WITH joins, **step 3 was missing**. After pushing down single-source clauses, any remaining WHERE clauses (multi-source + unpushable single-source) were left as separate array elements instead of being combined. + +Example scenario: +```javascript +q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY + .join({ posts: postsCollection }, ...) + .where(({ stats }) => gt(stats.count, 5)) // Single-source but can't push down + .where(({ posts }) => gt(posts.views, 100)) // Single-source, can push down + .where(({ stats, posts }) => eq(stats.id, posts.author_id)) // Multi-source +``` + +After optimization: +- Posts clause: pushed down ✓ +- Stats clause: can't push down (GROUP BY safety check) +- Multi-source clause: must stay in main query +- **Result**: 2 separate WHERE clauses remaining → 2 filter operators ✗ + +### The Pipeline Impact +During query compilation (`compiler/index.ts:185-196`), each WHERE clause creates a **separate filter() operation**: ```typescript if (query.where && query.where.length > 0) { @@ -41,27 +71,24 @@ if (query.where && query.where.length > 0) { } ``` -This creates **three separate filter operators** in the pipeline instead of one, causing unnecessary overhead. - ### Performance Impact - Each filter operator adds overhead to the pipeline -- Data flows through multiple filter stages instead of a single combined evaluation -- This compounds when rendering many items simultaneously (as reported in the issue) +- Data flows through N filter stages instead of 1 combined evaluation +- This compounds when rendering many items simultaneously - Results in 40%+ performance degradation ## The Solution -Modified the optimizer to combine multiple WHERE clauses into a single AND expression for queries without joins: +Implemented **step 3** for all query types: + +### Fix #1: Queries WITHOUT Joins (in `applySingleLevelOptimization`) ```typescript // For queries without joins, combine multiple WHERE clauses into a single clause -// to avoid creating multiple filter operators in the pipeline if (!query.join || query.join.length === 0) { if (query.where.length > 1) { - // Combine multiple WHERE clauses into a single AND expression const splitWhereClauses = splitAndClauses(query.where) const combinedWhere = combineWithAnd(splitWhereClauses) - return { ...query, where: [combinedWhere], @@ -71,22 +98,40 @@ if (!query.join || query.join.length === 0) { } ``` +### Fix #2: Queries WITH Joins (in `applyOptimizations`) +After pushing down single-source clauses, combine all remaining WHERE clauses: + +```typescript +// Combine multiple remaining WHERE clauses into a single clause to avoid +// multiple filter operations in the pipeline (performance optimization) +const finalWhere: Array = + remainingWhereClauses.length > 1 + ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] + : remainingWhereClauses +``` + ### Benefits -1. **Single Pipeline Operator**: Only one filter() operation is added to the pipeline instead of N operations -2. **Consistent Performance**: Performance matches single WHERE with AND -3. **Semantically Equivalent**: Multiple WHERE clauses are still ANDed together, just more efficiently -4. **Applies Broadly**: Works for simple FROM queries as well as nested subqueries +1. **Single Pipeline Operator**: Only one filter() operation regardless of how many WHERE clauses remain +2. **Consistent Performance**: Matches the performance of writing WHERE clauses manually with AND +3. **Semantically Equivalent**: Multiple WHERE clauses are still ANDed together +4. **Applies Universally**: Works for all query types (with/without joins, simple/complex) +5. **Preserves Optimizations**: Still does predicate pushdown for queries with joins ## Implementation Details ### Files Changed -1. **`packages/db/src/query/optimizer.ts`**: Added WHERE clause combining logic for queries without joins -2. **`packages/db/tests/query/optimizer.test.ts`**: Updated tests to expect combined WHERE clauses +1. **`packages/db/src/query/optimizer.ts`**: + - Added WHERE combining for queries without joins (line 333-350) + - Added WHERE combining after predicate pushdown for queries with joins (line 690-695) +2. **`packages/db/tests/query/optimizer.test.ts`**: + - Added test: "should combine multiple WHERE clauses for queries without joins" + - Added test: "should combine multiple remaining WHERE clauses after optimization" + - Updated 5 existing tests to expect combined WHERE clauses ### Testing -- All 42 optimizer tests pass -- Added new test case: "should combine multiple WHERE clauses for queries without joins" -- Updated 5 existing tests to reflect the new optimization behavior +- All 43 optimizer tests pass +- New test confirms remaining WHERE clauses are combined after optimization +- Updated tests reflect the new optimization behavior ### Before vs After diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index e10e13a7a..8988b0e15 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -687,6 +687,13 @@ function applyOptimizations( // If optimized and no outer JOINs - don't keep (original behavior) } + // Combine multiple remaining WHERE clauses into a single clause to avoid + // multiple filter operations in the pipeline (performance optimization) + const finalWhere: Array = + remainingWhereClauses.length > 1 + ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] + : remainingWhereClauses + // Create a completely new query object to ensure immutability const optimizedQuery: QueryIR = { // Copy all non-optimized fields as-is @@ -705,8 +712,8 @@ function applyOptimizations( from: optimizedFrom, join: optimizedJoins, - // Only include WHERE clauses that weren't successfully optimized - where: remainingWhereClauses.length > 0 ? remainingWhereClauses : [], + // Include combined WHERE clauses + where: finalWhere.length > 0 ? finalWhere : [], } return optimizedQuery diff --git a/packages/db/tests/query/optimizer.test.ts b/packages/db/tests/query/optimizer.test.ts index a9eec06dc..c11eb90fc 100644 --- a/packages/db/tests/query/optimizer.test.ts +++ b/packages/db/tests/query/optimizer.test.ts @@ -1464,6 +1464,64 @@ describe(`Query Optimizer`, () => { ) } }) + + test(`should combine multiple remaining WHERE clauses after optimization`, () => { + // This test verifies that if multiple WHERE clauses remain after optimization + // (e.g., because some can't be pushed down), they are combined into a single clause + const subqueryWithAggregates: QueryIR = { + from: new CollectionRef(mockCollection, `u`), + select: { + department_id: createPropRef(`u`, `department_id`), + user_count: createAgg(`count`, createPropRef(`u`, `id`)), + }, + groupBy: [createPropRef(`u`, `department_id`)], + } + + const query: QueryIR = { + from: new QueryRef(subqueryWithAggregates, `stats`), + join: [ + { + from: new CollectionRef(mockCollection, `p`), + type: `inner`, + left: createPropRef(`stats`, `department_id`), + right: createPropRef(`p`, `department_id`), + }, + ], + where: [ + createGt(createPropRef(`stats`, `user_count`), createValue(5)), // Can't push down - GROUP BY + createGt(createPropRef(`p`, `views`), createValue(100)), // Can push down + createEq( + createPropRef(`stats`, `department_id`), + createPropRef(`p`, `author_dept`) + ), // Multi-source + ], + } + + const { optimizedQuery: optimized } = optimizeQuery(query) + + // The posts clause should be pushed down + expect(optimized.join).toHaveLength(1) + if (optimized.join && optimized.join[0]) { + expect(optimized.join[0].from.type).toBe(`queryRef`) + if (optimized.join[0].from.type === `queryRef`) { + expect(optimized.join[0].from.query.where).toHaveLength(1) + } + } + + // The stats clause and multi-source clause should remain BUT be combined into ONE + console.log( + `Remaining WHERE clauses: ${optimized.where?.length || 0}`, + JSON.stringify(optimized.where, null, 2) + ) + expect(optimized.where).toBeDefined() + // This is the KEY assertion - all remaining clauses should be combined + // Currently this might FAIL if step 3 is missing + expect(optimized.where!.length).toBe(1) + expect(optimized.where![0]).toMatchObject({ + type: `func`, + name: `and`, + }) + }) }) describe(`JOIN semantics preservation`, () => { From 88bc1df0fbd7edd63b7d96a1601af6676cc87780 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 16:03:39 +0000 Subject: [PATCH 4/9] style: Run prettier on markdown files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/optimize-multiple-where-clauses.md | 1 + ISSUE-445-INVESTIGATION.md | 40 ++++++++++++++----- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/.changeset/optimize-multiple-where-clauses.md b/.changeset/optimize-multiple-where-clauses.md index 364c54ed0..9d2f35cd4 100644 --- a/.changeset/optimize-multiple-where-clauses.md +++ b/.changeset/optimize-multiple-where-clauses.md @@ -3,6 +3,7 @@ --- Fixed performance issue where using multiple `.where()` calls resulted in 40%+ slowdown by creating multiple filter operators in the query pipeline. The optimizer now implements the missing final step (step 3) of combining remaining WHERE clauses into a single AND expression. This applies to both queries with and without joins: + - Queries without joins: Multiple WHERE clauses are now combined before compilation - Queries with joins: Remaining WHERE clauses after predicate pushdown are combined diff --git a/ISSUE-445-INVESTIGATION.md b/ISSUE-445-INVESTIGATION.md index 9e138c44e..c8eb2e95b 100644 --- a/ISSUE-445-INVESTIGATION.md +++ b/ISSUE-445-INVESTIGATION.md @@ -1,6 +1,7 @@ # Investigation: Issue #445 - useLiveQuery Performance Problem ## Summary + Investigated and fixed a performance issue where using multiple `.where()` calls resulted in 40%+ slowdown compared to using a single WHERE clause with AND. The root cause affected **both** queries with and without joins. ## Root Cause Analysis @@ -8,6 +9,7 @@ Investigated and fixed a performance issue where using multiple `.where()` calls ### The Complete Problem The optimizer's intended process is: + 1. **Split**: Split all WHERE clauses with "and" at top level into separate clauses 2. **Push down**: Push single-source clauses to subqueries (for queries with joins) 3. **Combine**: Combine all remaining WHERE clauses back into a single one with "and" @@ -15,10 +17,13 @@ The optimizer's intended process is: **Step 3 was missing entirely**, causing multiple filter operations in the pipeline. ### Problem #1: Queries WITHOUT Joins + When users write queries like this: + ```javascript -useLiveQuery(q => - q.from({ item: orderCollection }) +useLiveQuery((q) => + q + .from({ item: orderCollection }) .where(({ item }) => eq(item.gridId, gridId)) .where(({ item }) => eq(item.rowId, rowId)) .where(({ item }) => eq(item.side, side)) @@ -26,6 +31,7 @@ useLiveQuery(q => ``` The optimizer was completely skipping queries without joins (`optimizer.ts:333-337`): + ```typescript // Skip optimization if there are no joins - predicate pushdown only benefits joins // Single-table queries don't benefit from this optimization @@ -37,9 +43,11 @@ if (!query.join || query.join.length === 0) { This meant ALL THREE STEPS were skipped, leaving WHERE clauses as separate array elements. ### Problem #2: Queries WITH Joins (Broader Issue) + Even for queries WITH joins, **step 3 was missing**. After pushing down single-source clauses, any remaining WHERE clauses (multi-source + unpushable single-source) were left as separate array elements instead of being combined. Example scenario: + ```javascript q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY .join({ posts: postsCollection }, ...) @@ -49,12 +57,14 @@ q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY ``` After optimization: + - Posts clause: pushed down ✓ - Stats clause: can't push down (GROUP BY safety check) - Multi-source clause: must stay in main query - **Result**: 2 separate WHERE clauses remaining → 2 filter operators ✗ ### The Pipeline Impact + During query compilation (`compiler/index.ts:185-196`), each WHERE clause creates a **separate filter() operation**: ```typescript @@ -72,6 +82,7 @@ if (query.where && query.where.length > 0) { ``` ### Performance Impact + - Each filter operator adds overhead to the pipeline - Data flows through N filter stages instead of 1 combined evaluation - This compounds when rendering many items simultaneously @@ -99,6 +110,7 @@ if (!query.join || query.join.length === 0) { ``` ### Fix #2: Queries WITH Joins (in `applyOptimizations`) + After pushing down single-source clauses, combine all remaining WHERE clauses: ```typescript @@ -111,6 +123,7 @@ const finalWhere: Array = ``` ### Benefits + 1. **Single Pipeline Operator**: Only one filter() operation regardless of how many WHERE clauses remain 2. **Consistent Performance**: Matches the performance of writing WHERE clauses manually with AND 3. **Semantically Equivalent**: Multiple WHERE clauses are still ANDed together @@ -120,6 +133,7 @@ const finalWhere: Array = ## Implementation Details ### Files Changed + 1. **`packages/db/src/query/optimizer.ts`**: - Added WHERE combining for queries without joins (line 333-350) - Added WHERE combining after predicate pushdown for queries with joins (line 690-695) @@ -129,6 +143,7 @@ const finalWhere: Array = - Updated 5 existing tests to expect combined WHERE clauses ### Testing + - All 43 optimizer tests pass - New test confirms remaining WHERE clauses are combined after optimization - Updated tests reflect the new optimization behavior @@ -136,6 +151,7 @@ const finalWhere: Array = ### Before vs After **Before (3 separate filters):** + ``` FROM collection → filter(gridId = x) @@ -144,6 +160,7 @@ FROM collection ``` **After (1 combined filter):** + ``` FROM collection → filter(AND(gridId = x, rowId = y, side = z)) @@ -152,11 +169,13 @@ FROM collection ## Impact on Other Query Types The optimization is **safe** and applies only to: + - Queries **without** joins - Queries with **multiple** WHERE clauses (2 or more) - Both direct collection references and subqueries It does **not** affect: + - Queries with joins (these already go through predicate pushdown optimization) - Queries with a single WHERE clause (no need to combine) - Functional WHERE clauses (`fn.where()`) @@ -164,10 +183,13 @@ It does **not** affect: ## Next Steps ### For the Issue Reporter + Please test the fix with your reproduction case. The performance should now match or exceed your Redux selectors. ### For Maintainers + Consider whether this optimization should also apply to: + 1. Functional WHERE clauses (`fn.where()`) 2. HAVING clauses (similar pattern exists) @@ -177,18 +199,18 @@ To verify the fix, compare: ```javascript // Multiple WHERE calls (now optimized) -query.from({ item: collection }) +query + .from({ item: collection }) .where(({ item }) => eq(item.gridId, gridId)) .where(({ item }) => eq(item.rowId, rowId)) .where(({ item }) => eq(item.side, side)) // Single WHERE with AND (already fast) -query.from({ item: collection }) - .where(({ item }) => and( - eq(item.gridId, gridId), - eq(item.rowId, rowId), - eq(item.side, side) - )) +query + .from({ item: collection }) + .where(({ item }) => + and(eq(item.gridId, gridId), eq(item.rowId, rowId), eq(item.side, side)) + ) ``` Both should now have identical performance characteristics. From 75301989823e9dfafe851e8f6c8c72f4118984d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 16:04:50 +0000 Subject: [PATCH 5/9] docs: Add PR body update for issue #445 fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- PR-BODY-UPDATE.md | 112 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 PR-BODY-UPDATE.md diff --git a/PR-BODY-UPDATE.md b/PR-BODY-UPDATE.md new file mode 100644 index 000000000..a2e57f33f --- /dev/null +++ b/PR-BODY-UPDATE.md @@ -0,0 +1,112 @@ +# Fix: Optimizer Missing Final Step - Combine Remaining WHERE Clauses + +## Overview +Fixes issue #445 - 40%+ performance slowdown when using multiple `.where()` calls. The root cause was broader than initially identified: **the optimizer was missing "step 3"** (combining remaining WHERE clauses), affecting both queries with and without joins. + +## Problem Analysis + +### The Optimizer's Intended Process +1. **Split**: Split WHERE clauses with AND at the root level into separate clauses +2. **Push down**: Push single-source clauses to subqueries (for queries with joins) +3. **Combine**: Combine all remaining WHERE clauses back into a single AND expression + +**Step 3 was completely missing**, causing multiple `filter()` operations in the query pipeline. + +### Two Types of Affected Queries + +#### 1. Queries WITHOUT Joins (Reported in Issue #445) +```javascript +useLiveQuery(q => + q.from({ item: orderCollection }) + .where(({ item }) => eq(item.gridId, gridId)) + .where(({ item }) => eq(item.rowId, rowId)) + .where(({ item }) => eq(item.side, side)) +) +``` + +The optimizer was skipping these entirely, leaving **3 separate WHERE clauses** → **3 filter operators** in the pipeline. + +#### 2. Queries WITH Joins (Broader Issue) +```javascript +q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY + .join({ posts: postsCollection }, ...) + .where(({ stats }) => gt(stats.count, 5)) // Can't push down (safety check) + .where(({ posts }) => gt(posts.views, 100)) // Can push down ✓ + .where(({ stats, posts }) => eq(stats.id, posts.author_id)) // Multi-source +``` + +After predicate pushdown: +- Posts clause: pushed down ✓ +- Stats clause + multi-source clause: **2 separate WHERE clauses remain** → **2 filter operators** ✗ + +### Performance Impact +Each filter operator adds overhead. Data flows through N filter stages instead of 1 combined evaluation, causing 40%+ slowdown when rendering many items. + +## Solution + +Implemented "step 3" in two places: + +### Fix #1: `applySingleLevelOptimization` (queries without joins) +```typescript +if (!query.join || query.join.length === 0) { + if (query.where.length > 1) { + const splitWhereClauses = splitAndClauses(query.where) + const combinedWhere = combineWithAnd(splitWhereClauses) + return { ...query, where: [combinedWhere] } + } + return query +} +``` + +### Fix #2: `applyOptimizations` (queries with joins) +```typescript +// After predicate pushdown, combine remaining WHERE clauses +const finalWhere: Array = + remainingWhereClauses.length > 1 + ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] + : remainingWhereClauses +``` + +## Testing +- ✅ All 43 optimizer tests pass +- ✅ Added test: "should combine multiple WHERE clauses for queries without joins" +- ✅ Added test: "should combine multiple remaining WHERE clauses after optimization" +- ✅ Updated 5 existing tests to expect combined WHERE clauses + +## Before vs After + +**Before (Multiple filter operators):** +``` +FROM collection +→ filter(gridId = x) +→ filter(rowId = y) +→ filter(side = z) +``` + +**After (Single combined filter):** +``` +FROM collection +→ filter(AND(gridId = x, rowId = y, side = z)) +``` + +## Benefits +1. **Single Pipeline Operator**: Only 1 filter operation regardless of how many WHERE clauses +2. **Consistent Performance**: Chaining `.where()` now performs identically to using `.where(and(...))` +3. **Semantically Equivalent**: Multiple WHERE clauses still ANDed together +4. **Universal Application**: Works for all query types (with/without joins, simple/complex) +5. **Preserves Optimizations**: Still performs predicate pushdown for queries with joins + +## Files Changed +- `packages/db/src/query/optimizer.ts` - Added WHERE combining logic (2 locations) +- `packages/db/tests/query/optimizer.test.ts` - Added tests and updated existing ones +- `.changeset/optimize-multiple-where-clauses.md` - Changeset describing the fix +- `ISSUE-445-INVESTIGATION.md` - Detailed investigation report + +## Credits +Thanks to colleague feedback for catching that step 3 was missing from the optimizer! + +--- + +🤖 Generated with [Claude Code](https://claude.com/claude-code) + +Co-Authored-By: Claude From 7ef86358dadbdd2cdce157367a5f3d4ebdcc455a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 16:08:53 +0000 Subject: [PATCH 6/9] docs: Remove specific 40% performance claim MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original issue compared TanStack db with Redux, not the bug itself. Changed to more general language about performance degradation. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .changeset/optimize-multiple-where-clauses.md | 2 +- ISSUE-445-INVESTIGATION.md | 4 ++-- PR-BODY-UPDATE.md | 23 +++++++++++++++---- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.changeset/optimize-multiple-where-clauses.md b/.changeset/optimize-multiple-where-clauses.md index 9d2f35cd4..6f2d55850 100644 --- a/.changeset/optimize-multiple-where-clauses.md +++ b/.changeset/optimize-multiple-where-clauses.md @@ -2,7 +2,7 @@ "@tanstack/db": patch --- -Fixed performance issue where using multiple `.where()` calls resulted in 40%+ slowdown by creating multiple filter operators in the query pipeline. The optimizer now implements the missing final step (step 3) of combining remaining WHERE clauses into a single AND expression. This applies to both queries with and without joins: +Fixed performance issue where using multiple `.where()` calls created multiple filter operators in the query pipeline. The optimizer now implements the missing final step (step 3) of combining remaining WHERE clauses into a single AND expression. This applies to both queries with and without joins: - Queries without joins: Multiple WHERE clauses are now combined before compilation - Queries with joins: Remaining WHERE clauses after predicate pushdown are combined diff --git a/ISSUE-445-INVESTIGATION.md b/ISSUE-445-INVESTIGATION.md index c8eb2e95b..69e5fe374 100644 --- a/ISSUE-445-INVESTIGATION.md +++ b/ISSUE-445-INVESTIGATION.md @@ -2,7 +2,7 @@ ## Summary -Investigated and fixed a performance issue where using multiple `.where()` calls resulted in 40%+ slowdown compared to using a single WHERE clause with AND. The root cause affected **both** queries with and without joins. +Investigated and fixed a performance issue where using multiple `.where()` calls resulted in slower performance compared to using a single WHERE clause with AND. The root cause affected **both** queries with and without joins. ## Root Cause Analysis @@ -86,7 +86,7 @@ if (query.where && query.where.length > 0) { - Each filter operator adds overhead to the pipeline - Data flows through N filter stages instead of 1 combined evaluation - This compounds when rendering many items simultaneously -- Results in 40%+ performance degradation +- Results in unnecessary performance degradation ## The Solution diff --git a/PR-BODY-UPDATE.md b/PR-BODY-UPDATE.md index a2e57f33f..cc9d7f553 100644 --- a/PR-BODY-UPDATE.md +++ b/PR-BODY-UPDATE.md @@ -1,11 +1,13 @@ # Fix: Optimizer Missing Final Step - Combine Remaining WHERE Clauses ## Overview -Fixes issue #445 - 40%+ performance slowdown when using multiple `.where()` calls. The root cause was broader than initially identified: **the optimizer was missing "step 3"** (combining remaining WHERE clauses), affecting both queries with and without joins. + +Fixes issue #445 - performance issue when using multiple `.where()` calls. The root cause was broader than initially identified: **the optimizer was missing "step 3"** (combining remaining WHERE clauses), affecting both queries with and without joins. ## Problem Analysis ### The Optimizer's Intended Process + 1. **Split**: Split WHERE clauses with AND at the root level into separate clauses 2. **Push down**: Push single-source clauses to subqueries (for queries with joins) 3. **Combine**: Combine all remaining WHERE clauses back into a single AND expression @@ -15,9 +17,11 @@ Fixes issue #445 - 40%+ performance slowdown when using multiple `.where()` call ### Two Types of Affected Queries #### 1. Queries WITHOUT Joins (Reported in Issue #445) + ```javascript -useLiveQuery(q => - q.from({ item: orderCollection }) +useLiveQuery((q) => + q + .from({ item: orderCollection }) .where(({ item }) => eq(item.gridId, gridId)) .where(({ item }) => eq(item.rowId, rowId)) .where(({ item }) => eq(item.side, side)) @@ -27,6 +31,7 @@ useLiveQuery(q => The optimizer was skipping these entirely, leaving **3 separate WHERE clauses** → **3 filter operators** in the pipeline. #### 2. Queries WITH Joins (Broader Issue) + ```javascript q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY .join({ posts: postsCollection }, ...) @@ -36,17 +41,20 @@ q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY ``` After predicate pushdown: + - Posts clause: pushed down ✓ - Stats clause + multi-source clause: **2 separate WHERE clauses remain** → **2 filter operators** ✗ ### Performance Impact -Each filter operator adds overhead. Data flows through N filter stages instead of 1 combined evaluation, causing 40%+ slowdown when rendering many items. + +Each filter operator adds overhead. Data flows through N filter stages instead of 1 combined evaluation, causing unnecessary performance degradation especially when rendering many items. ## Solution Implemented "step 3" in two places: ### Fix #1: `applySingleLevelOptimization` (queries without joins) + ```typescript if (!query.join || query.join.length === 0) { if (query.where.length > 1) { @@ -59,6 +67,7 @@ if (!query.join || query.join.length === 0) { ``` ### Fix #2: `applyOptimizations` (queries with joins) + ```typescript // After predicate pushdown, combine remaining WHERE clauses const finalWhere: Array = @@ -68,6 +77,7 @@ const finalWhere: Array = ``` ## Testing + - ✅ All 43 optimizer tests pass - ✅ Added test: "should combine multiple WHERE clauses for queries without joins" - ✅ Added test: "should combine multiple remaining WHERE clauses after optimization" @@ -76,6 +86,7 @@ const finalWhere: Array = ## Before vs After **Before (Multiple filter operators):** + ``` FROM collection → filter(gridId = x) @@ -84,12 +95,14 @@ FROM collection ``` **After (Single combined filter):** + ``` FROM collection → filter(AND(gridId = x, rowId = y, side = z)) ``` ## Benefits + 1. **Single Pipeline Operator**: Only 1 filter operation regardless of how many WHERE clauses 2. **Consistent Performance**: Chaining `.where()` now performs identically to using `.where(and(...))` 3. **Semantically Equivalent**: Multiple WHERE clauses still ANDed together @@ -97,12 +110,14 @@ FROM collection 5. **Preserves Optimizations**: Still performs predicate pushdown for queries with joins ## Files Changed + - `packages/db/src/query/optimizer.ts` - Added WHERE combining logic (2 locations) - `packages/db/tests/query/optimizer.test.ts` - Added tests and updated existing ones - `.changeset/optimize-multiple-where-clauses.md` - Changeset describing the fix - `ISSUE-445-INVESTIGATION.md` - Detailed investigation report ## Credits + Thanks to colleague feedback for catching that step 3 was missing from the optimizer! --- From 962d128a65be51261ff8f54b89dbb9f6ae243a4e Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 16:16:26 +0000 Subject: [PATCH 7/9] docs: Remove temporary investigation and PR body files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These were used for context during development but aren't needed in the repo. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- ISSUE-445-INVESTIGATION.md | 229 ------------------------------------- PR-BODY-UPDATE.md | 127 -------------------- 2 files changed, 356 deletions(-) delete mode 100644 ISSUE-445-INVESTIGATION.md delete mode 100644 PR-BODY-UPDATE.md diff --git a/ISSUE-445-INVESTIGATION.md b/ISSUE-445-INVESTIGATION.md deleted file mode 100644 index 69e5fe374..000000000 --- a/ISSUE-445-INVESTIGATION.md +++ /dev/null @@ -1,229 +0,0 @@ -# Investigation: Issue #445 - useLiveQuery Performance Problem - -## Summary - -Investigated and fixed a performance issue where using multiple `.where()` calls resulted in slower performance compared to using a single WHERE clause with AND. The root cause affected **both** queries with and without joins. - -## Root Cause Analysis - -### The Complete Problem - -The optimizer's intended process is: - -1. **Split**: Split all WHERE clauses with "and" at top level into separate clauses -2. **Push down**: Push single-source clauses to subqueries (for queries with joins) -3. **Combine**: Combine all remaining WHERE clauses back into a single one with "and" - -**Step 3 was missing entirely**, causing multiple filter operations in the pipeline. - -### Problem #1: Queries WITHOUT Joins - -When users write queries like this: - -```javascript -useLiveQuery((q) => - q - .from({ item: orderCollection }) - .where(({ item }) => eq(item.gridId, gridId)) - .where(({ item }) => eq(item.rowId, rowId)) - .where(({ item }) => eq(item.side, side)) -) -``` - -The optimizer was completely skipping queries without joins (`optimizer.ts:333-337`): - -```typescript -// Skip optimization if there are no joins - predicate pushdown only benefits joins -// Single-table queries don't benefit from this optimization -if (!query.join || query.join.length === 0) { - return query -} -``` - -This meant ALL THREE STEPS were skipped, leaving WHERE clauses as separate array elements. - -### Problem #2: Queries WITH Joins (Broader Issue) - -Even for queries WITH joins, **step 3 was missing**. After pushing down single-source clauses, any remaining WHERE clauses (multi-source + unpushable single-source) were left as separate array elements instead of being combined. - -Example scenario: - -```javascript -q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY - .join({ posts: postsCollection }, ...) - .where(({ stats }) => gt(stats.count, 5)) // Single-source but can't push down - .where(({ posts }) => gt(posts.views, 100)) // Single-source, can push down - .where(({ stats, posts }) => eq(stats.id, posts.author_id)) // Multi-source -``` - -After optimization: - -- Posts clause: pushed down ✓ -- Stats clause: can't push down (GROUP BY safety check) -- Multi-source clause: must stay in main query -- **Result**: 2 separate WHERE clauses remaining → 2 filter operators ✗ - -### The Pipeline Impact - -During query compilation (`compiler/index.ts:185-196`), each WHERE clause creates a **separate filter() operation**: - -```typescript -if (query.where && query.where.length > 0) { - for (const where of query.where) { - const whereExpression = getWhereExpression(where) - const compiledWhere = compileExpression(whereExpression) - pipeline = pipeline.pipe( - filter(([_key, namespacedRow]) => { - return compiledWhere(namespacedRow) - }) - ) - } -} -``` - -### Performance Impact - -- Each filter operator adds overhead to the pipeline -- Data flows through N filter stages instead of 1 combined evaluation -- This compounds when rendering many items simultaneously -- Results in unnecessary performance degradation - -## The Solution - -Implemented **step 3** for all query types: - -### Fix #1: Queries WITHOUT Joins (in `applySingleLevelOptimization`) - -```typescript -// For queries without joins, combine multiple WHERE clauses into a single clause -if (!query.join || query.join.length === 0) { - if (query.where.length > 1) { - const splitWhereClauses = splitAndClauses(query.where) - const combinedWhere = combineWithAnd(splitWhereClauses) - return { - ...query, - where: [combinedWhere], - } - } - return query -} -``` - -### Fix #2: Queries WITH Joins (in `applyOptimizations`) - -After pushing down single-source clauses, combine all remaining WHERE clauses: - -```typescript -// Combine multiple remaining WHERE clauses into a single clause to avoid -// multiple filter operations in the pipeline (performance optimization) -const finalWhere: Array = - remainingWhereClauses.length > 1 - ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] - : remainingWhereClauses -``` - -### Benefits - -1. **Single Pipeline Operator**: Only one filter() operation regardless of how many WHERE clauses remain -2. **Consistent Performance**: Matches the performance of writing WHERE clauses manually with AND -3. **Semantically Equivalent**: Multiple WHERE clauses are still ANDed together -4. **Applies Universally**: Works for all query types (with/without joins, simple/complex) -5. **Preserves Optimizations**: Still does predicate pushdown for queries with joins - -## Implementation Details - -### Files Changed - -1. **`packages/db/src/query/optimizer.ts`**: - - Added WHERE combining for queries without joins (line 333-350) - - Added WHERE combining after predicate pushdown for queries with joins (line 690-695) -2. **`packages/db/tests/query/optimizer.test.ts`**: - - Added test: "should combine multiple WHERE clauses for queries without joins" - - Added test: "should combine multiple remaining WHERE clauses after optimization" - - Updated 5 existing tests to expect combined WHERE clauses - -### Testing - -- All 43 optimizer tests pass -- New test confirms remaining WHERE clauses are combined after optimization -- Updated tests reflect the new optimization behavior - -### Before vs After - -**Before (3 separate filters):** - -``` -FROM collection -→ filter(gridId = x) -→ filter(rowId = y) -→ filter(side = z) -``` - -**After (1 combined filter):** - -``` -FROM collection -→ filter(AND(gridId = x, rowId = y, side = z)) -``` - -## Impact on Other Query Types - -The optimization is **safe** and applies only to: - -- Queries **without** joins -- Queries with **multiple** WHERE clauses (2 or more) -- Both direct collection references and subqueries - -It does **not** affect: - -- Queries with joins (these already go through predicate pushdown optimization) -- Queries with a single WHERE clause (no need to combine) -- Functional WHERE clauses (`fn.where()`) - -## Next Steps - -### For the Issue Reporter - -Please test the fix with your reproduction case. The performance should now match or exceed your Redux selectors. - -### For Maintainers - -Consider whether this optimization should also apply to: - -1. Functional WHERE clauses (`fn.where()`) -2. HAVING clauses (similar pattern exists) - -## Performance Verification - -To verify the fix, compare: - -```javascript -// Multiple WHERE calls (now optimized) -query - .from({ item: collection }) - .where(({ item }) => eq(item.gridId, gridId)) - .where(({ item }) => eq(item.rowId, rowId)) - .where(({ item }) => eq(item.side, side)) - -// Single WHERE with AND (already fast) -query - .from({ item: collection }) - .where(({ item }) => - and(eq(item.gridId, gridId), eq(item.rowId, rowId), eq(item.side, side)) - ) -``` - -Both should now have identical performance characteristics. - -## Related Code Locations - -- Query Optimizer: `packages/db/src/query/optimizer.ts` -- Query Compiler: `packages/db/src/query/compiler/index.ts` -- WHERE Evaluation: `packages/db/src/query/compiler/evaluators.ts` -- Optimizer Tests: `packages/db/tests/query/optimizer.test.ts` - -## References - -- Issue: https://github.com/TanStack/db/issues/445 -- Commit: e46fc81 -- Branch: claude/investigate-db-slowdown-011CUdbdVnfi28CckcUPfp5j diff --git a/PR-BODY-UPDATE.md b/PR-BODY-UPDATE.md deleted file mode 100644 index cc9d7f553..000000000 --- a/PR-BODY-UPDATE.md +++ /dev/null @@ -1,127 +0,0 @@ -# Fix: Optimizer Missing Final Step - Combine Remaining WHERE Clauses - -## Overview - -Fixes issue #445 - performance issue when using multiple `.where()` calls. The root cause was broader than initially identified: **the optimizer was missing "step 3"** (combining remaining WHERE clauses), affecting both queries with and without joins. - -## Problem Analysis - -### The Optimizer's Intended Process - -1. **Split**: Split WHERE clauses with AND at the root level into separate clauses -2. **Push down**: Push single-source clauses to subqueries (for queries with joins) -3. **Combine**: Combine all remaining WHERE clauses back into a single AND expression - -**Step 3 was completely missing**, causing multiple `filter()` operations in the query pipeline. - -### Two Types of Affected Queries - -#### 1. Queries WITHOUT Joins (Reported in Issue #445) - -```javascript -useLiveQuery((q) => - q - .from({ item: orderCollection }) - .where(({ item }) => eq(item.gridId, gridId)) - .where(({ item }) => eq(item.rowId, rowId)) - .where(({ item }) => eq(item.side, side)) -) -``` - -The optimizer was skipping these entirely, leaving **3 separate WHERE clauses** → **3 filter operators** in the pipeline. - -#### 2. Queries WITH Joins (Broader Issue) - -```javascript -q.from({ stats: subqueryWithGroupBy }) // Can't push WHERE into GROUP BY - .join({ posts: postsCollection }, ...) - .where(({ stats }) => gt(stats.count, 5)) // Can't push down (safety check) - .where(({ posts }) => gt(posts.views, 100)) // Can push down ✓ - .where(({ stats, posts }) => eq(stats.id, posts.author_id)) // Multi-source -``` - -After predicate pushdown: - -- Posts clause: pushed down ✓ -- Stats clause + multi-source clause: **2 separate WHERE clauses remain** → **2 filter operators** ✗ - -### Performance Impact - -Each filter operator adds overhead. Data flows through N filter stages instead of 1 combined evaluation, causing unnecessary performance degradation especially when rendering many items. - -## Solution - -Implemented "step 3" in two places: - -### Fix #1: `applySingleLevelOptimization` (queries without joins) - -```typescript -if (!query.join || query.join.length === 0) { - if (query.where.length > 1) { - const splitWhereClauses = splitAndClauses(query.where) - const combinedWhere = combineWithAnd(splitWhereClauses) - return { ...query, where: [combinedWhere] } - } - return query -} -``` - -### Fix #2: `applyOptimizations` (queries with joins) - -```typescript -// After predicate pushdown, combine remaining WHERE clauses -const finalWhere: Array = - remainingWhereClauses.length > 1 - ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] - : remainingWhereClauses -``` - -## Testing - -- ✅ All 43 optimizer tests pass -- ✅ Added test: "should combine multiple WHERE clauses for queries without joins" -- ✅ Added test: "should combine multiple remaining WHERE clauses after optimization" -- ✅ Updated 5 existing tests to expect combined WHERE clauses - -## Before vs After - -**Before (Multiple filter operators):** - -``` -FROM collection -→ filter(gridId = x) -→ filter(rowId = y) -→ filter(side = z) -``` - -**After (Single combined filter):** - -``` -FROM collection -→ filter(AND(gridId = x, rowId = y, side = z)) -``` - -## Benefits - -1. **Single Pipeline Operator**: Only 1 filter operation regardless of how many WHERE clauses -2. **Consistent Performance**: Chaining `.where()` now performs identically to using `.where(and(...))` -3. **Semantically Equivalent**: Multiple WHERE clauses still ANDed together -4. **Universal Application**: Works for all query types (with/without joins, simple/complex) -5. **Preserves Optimizations**: Still performs predicate pushdown for queries with joins - -## Files Changed - -- `packages/db/src/query/optimizer.ts` - Added WHERE combining logic (2 locations) -- `packages/db/tests/query/optimizer.test.ts` - Added tests and updated existing ones -- `.changeset/optimize-multiple-where-clauses.md` - Changeset describing the fix -- `ISSUE-445-INVESTIGATION.md` - Detailed investigation report - -## Credits - -Thanks to colleague feedback for catching that step 3 was missing from the optimizer! - ---- - -🤖 Generated with [Claude Code](https://claude.com/claude-code) - -Co-Authored-By: Claude From db75cc723dddf92bc905476da49ae2daedcc4a6b Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 30 Oct 2025 16:29:12 +0000 Subject: [PATCH 8/9] fix: Flatten nested AND expressions when combining WHERE clauses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer feedback - when combining remaining WHERE clauses after predicate pushdown, flatten any nested AND expressions to avoid creating and(and(...), ...) structures. Changes: - Use flatMap(splitAndClausesRecursive) before combineWithAnd to flatten - Added test for nested AND flattening - Added test verifying functional WHERE clauses remain separate All 45 optimizer tests pass. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/db/src/query/optimizer.ts | 9 ++- packages/db/tests/query/optimizer.test.ts | 90 +++++++++++++++++++++++ 2 files changed, 98 insertions(+), 1 deletion(-) diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index 8988b0e15..4a8816a69 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -689,9 +689,16 @@ function applyOptimizations( // Combine multiple remaining WHERE clauses into a single clause to avoid // multiple filter operations in the pipeline (performance optimization) + // First flatten any nested AND expressions to avoid and(and(...), ...) const finalWhere: Array = remainingWhereClauses.length > 1 - ? [combineWithAnd(remainingWhereClauses.map(getWhereExpression))] + ? [ + combineWithAnd( + remainingWhereClauses.flatMap((clause) => + splitAndClausesRecursive(getWhereExpression(clause)) + ) + ), + ] : remainingWhereClauses // Create a completely new query object to ensure immutability diff --git a/packages/db/tests/query/optimizer.test.ts b/packages/db/tests/query/optimizer.test.ts index c11eb90fc..3748be66d 100644 --- a/packages/db/tests/query/optimizer.test.ts +++ b/packages/db/tests/query/optimizer.test.ts @@ -1522,6 +1522,96 @@ describe(`Query Optimizer`, () => { name: `and`, }) }) + + test(`should flatten nested AND expressions when combining remaining clauses`, () => { + // This test verifies that if remaining WHERE clauses already contain AND expressions, + // they are flattened to avoid and(and(...), ...) nesting + const subqueryWithAggregates: QueryIR = { + from: new CollectionRef(mockCollection, `u`), + select: { + department_id: createPropRef(`u`, `department_id`), + user_count: createAgg(`count`, createPropRef(`u`, `id`)), + }, + groupBy: [createPropRef(`u`, `department_id`)], + } + + const query: QueryIR = { + from: new QueryRef(subqueryWithAggregates, `stats`), + join: [ + { + from: new CollectionRef(mockCollection, `p`), + type: `inner`, + left: createPropRef(`stats`, `department_id`), + right: createPropRef(`p`, `department_id`), + }, + ], + where: [ + // This is an AND expression that can't be pushed down + createAnd( + createGt(createPropRef(`stats`, `user_count`), createValue(5)), + createEq(createPropRef(`stats`, `department_id`), createValue(1)) + ), + createGt(createPropRef(`p`, `views`), createValue(100)), // Can push down + createEq( + createPropRef(`stats`, `department_id`), + createPropRef(`p`, `author_dept`) + ), // Multi-source + ], + } + + const { optimizedQuery: optimized } = optimizeQuery(query) + + // The posts clause should be pushed down + expect(optimized.join).toHaveLength(1) + if (optimized.join && optimized.join[0]) { + expect(optimized.join[0].from.type).toBe(`queryRef`) + } + + // The remaining clauses should be combined WITHOUT nested AND + expect(optimized.where).toBeDefined() + expect(optimized.where!.length).toBe(1) + const combinedWhere = optimized.where![0] as any + expect(combinedWhere.type).toBe(`func`) + expect(combinedWhere.name).toBe(`and`) + // Should have 4 args (the 2 from the nested AND + the multi-source clause), + // NOT 2 args where one is itself an AND + expect(combinedWhere.args).toHaveLength(3) + // Verify none of the args are AND expressions (i.e., fully flattened) + const argTypes = combinedWhere.args.map((arg: any) => ({ + type: arg.type, + name: arg.name, + })) + expect(argTypes).not.toContainEqual({ type: `func`, name: `and` }) + }) + + test(`should not combine functional WHERE clauses`, () => { + // Verify that fn.where() clauses remain separate and are not combined + const query: QueryIR = { + from: new CollectionRef(mockCollection, `u`), + where: [ + createEq(createPropRef(`u`, `department_id`), createValue(1)), + createGt(createPropRef(`u`, `age`), createValue(25)), + ], + fnWhere: [ + (row: any) => row.u.name.startsWith(`A`), + (row: any) => row.u.email !== null, + ], + } + + const { optimizedQuery: optimized } = optimizeQuery(query) + + // Regular WHERE clauses should be combined into one + expect(optimized.where).toHaveLength(1) + expect(optimized.where![0]).toMatchObject({ + type: `func`, + name: `and`, + }) + + // Functional WHERE clauses should remain separate (not combined) + expect(optimized.fnWhere).toHaveLength(2) + expect(optimized.fnWhere![0]).toBeTypeOf(`function`) + expect(optimized.fnWhere![1]).toBeTypeOf(`function`) + }) }) describe(`JOIN semantics preservation`, () => { From aa93a368442822c489cda53f802d6a1a681783a9 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 31 Oct 2025 12:40:57 +0000 Subject: [PATCH 9/9] style: Remove issue reference from code comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As requested by @samwillis - issue references in code comments can become stale. The comment is self-explanatory without the reference. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- packages/db/src/query/optimizer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/db/src/query/optimizer.ts b/packages/db/src/query/optimizer.ts index 4a8816a69..71927da0d 100644 --- a/packages/db/src/query/optimizer.ts +++ b/packages/db/src/query/optimizer.ts @@ -331,7 +331,7 @@ function applySingleLevelOptimization(query: QueryIR): QueryIR { } // For queries without joins, combine multiple WHERE clauses into a single clause - // to avoid creating multiple filter operators in the pipeline (performance optimization for issue #445) + // to avoid creating multiple filter operators in the pipeline if (!query.join || query.join.length === 0) { // Only optimize if there are multiple WHERE clauses to combine if (query.where.length > 1) {