From 6173d89799a6ce0f82a0da49d5d1ce4ca8aa771b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 10 Mar 2026 21:45:06 +0000 Subject: [PATCH 1/4] fix: track original predicates in DeduplicatedLoadSubset to prevent unbounded WHERE growth updateTracking() was called with the modified loadOptions (containing the minusWherePredicates difference expression) instead of the original request options. When a "load all" request (where=undefined) was made after accumulating eq() predicates, the difference expression NOT(IN([...])) was tracked instead of setting hasLoadedAllData=true. Each subsequent "load all" request compounded the expression: NOT(IN(...) OR NOT(IN(...))), producing deeply nested 193KB+ WHERE clauses that crashed Electric's parser. The fix separates loadOptions (sent to backend, may contain optimized difference query) from trackingOptions (used for tracking, preserves the original predicate). This ensures "load all" correctly sets hasLoadedAllData=true regardless of the optimization applied to the actual request. https://claude.ai/code/session_01Vp75RhjVR4tV5FdjKJbBWE --- packages/db/src/query/subset-dedupe.ts | 32 ++++--- packages/db/tests/query/subset-dedupe.test.ts | 86 +++++++++++++++++++ 2 files changed, 107 insertions(+), 11 deletions(-) diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts index 4959d3cbb..991a5a8f3 100644 --- a/packages/db/src/query/subset-dedupe.ts +++ b/packages/db/src/query/subset-dedupe.ts @@ -129,25 +129,35 @@ export class DeduplicatedLoadSubset { // Not fully covered by existing data // Compute the subset of data that is not covered by the existing data // such that we only have to load that subset of missing data - const clonedOptions = cloneOptions(options) + // We clone options twice: + // - trackingOptions: preserves the original predicate for accurate tracking + // - loadOptions: may be modified with the difference expression for the actual request + // This separation is critical: tracking must reflect what data was REQUESTED + // (e.g., where=undefined means "all data"), not the optimized query that was sent + // to the backend (e.g., NOT(already_loaded)). Without this, a "load all" request + // would track the difference expression instead of setting hasLoadedAllData=true, + // causing unbounded expression growth on subsequent requests. + const trackingOptions = cloneOptions(options) + const loadOptions = cloneOptions(options) if (this.unlimitedWhere !== undefined && options.limit === undefined) { // Compute difference to get only the missing data // We can only do this for unlimited queries // and we can only remove data that was loaded from unlimited queries // because with limited queries we have no way to express that we already loaded part of the matching data - clonedOptions.where = - minusWherePredicates(clonedOptions.where, this.unlimitedWhere) ?? - clonedOptions.where + loadOptions.where = + minusWherePredicates(loadOptions.where, this.unlimitedWhere) ?? + loadOptions.where } // Call underlying loadSubset to load the missing data - const resultPromise = this._loadSubset(clonedOptions) + const resultPromise = this._loadSubset(loadOptions) // Handle both sync (true) and async (Promise) return values if (resultPromise === true) { // Sync return - update tracking synchronously - // Clone options before storing to protect against caller mutation - this.updateTracking(clonedOptions) + // Use trackingOptions (original predicate) so tracking accurately reflects + // what was requested, not the optimized difference query + this.updateTracking(trackingOptions) return true } else { // Async return - track the promise and update tracking after it resolves @@ -158,16 +168,16 @@ export class DeduplicatedLoadSubset { // We need to create a reference to the in-flight entry so we can remove it later const inflightEntry = { - options: clonedOptions, // Store cloned options for subset matching + options: loadOptions, // Store load options for subset matching of in-flight requests promise: resultPromise .then((result) => { // Only update tracking if this request is still from the current generation // If reset() was called, the generation will have incremented and we should // not repopulate the state that was just cleared if (capturedGeneration === this.generation) { - // Use the cloned options that we captured before any caller mutations - // This ensures we track exactly what was loaded, not what the caller changed - this.updateTracking(clonedOptions) + // Use trackingOptions (original predicate) so tracking accurately reflects + // what was requested, not the optimized difference query + this.updateTracking(trackingOptions) } return result }) diff --git a/packages/db/tests/query/subset-dedupe.test.ts b/packages/db/tests/query/subset-dedupe.test.ts index 9067c931f..60cb53937 100644 --- a/packages/db/tests/query/subset-dedupe.test.ts +++ b/packages/db/tests/query/subset-dedupe.test.ts @@ -597,6 +597,92 @@ describe(`createDeduplicatedLoadSubset`, () => { await deduplicated.loadSubset({}) expect(callCount).toBe(2) expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded + + // After loading all data (undefined where), subsequent calls should be deduplicated + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(5)), + }) + expect(result).toBe(true) // Should be covered since we loaded all data + expect(callCount).toBe(2) // No additional call needed + }) + + it(`should not produce unbounded WHERE expressions when loading all data after eq accumulation`, async () => { + // This test reproduces the production bug where navigating to many tasks + // (each adding eq(task_id, uuid) to the accumulator) and then loading all data + // (no WHERE clause) caused exponentially growing expressions instead of + // correctly setting hasLoadedAllData=true. + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // Simulate visiting multiple tasks, each adding an eq predicate + for (let i = 0; i < 10; i++) { + await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-${i}`)), + }) + } + // After 10 eq calls, unlimitedWhere should be IN(task_id, [uuid-0, ..., uuid-9]) + expect(callCount).toBe(10) + + // Now load all data (sessions index page with no WHERE clause) + // This should send NOT(IN(...)) to the backend but track as "all data loaded" + await deduplicated.loadSubset({}) + expect(callCount).toBe(11) + + // The load request should be NOT(accumulated predicate) + expect(calls[10]!.where).toBeDefined() + expect((calls[10]!.where as any).name).toBe(`not`) + + // Critical: after loading all data, subsequent requests should be deduplicated + const result1 = await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-999`)), + }) + expect(result1).toBe(true) // Covered by "all data" load + expect(callCount).toBe(11) // No additional call + + // Loading all data again should also be deduplicated + const result2 = await deduplicated.loadSubset({}) + expect(result2).toBe(true) + expect(callCount).toBe(11) // Still no additional call + }) + + it(`should handle multiple all-data loads without expression growth`, async () => { + // Regression test: repeated "load all" requests should not cause + // the tracked predicate to grow unboundedly + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // First: load some specific data + await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-1`)), + }) + expect(callCount).toBe(1) + + // Load all data (first time) + await deduplicated.loadSubset({}) + expect(callCount).toBe(2) + + // Load all data (second time) - should be deduplicated since we already have everything + const result = await deduplicated.loadSubset({}) + expect(result).toBe(true) + expect(callCount).toBe(2) // No additional call - all data already loaded }) it(`should handle multiple overlapping unlimited calls`, async () => { From a4f49b068b9ac191e718b81bf7ead3a3c5eeffc0 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Tue, 10 Mar 2026 15:53:55 -0600 Subject: [PATCH 2/4] Apply code review and simplification fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Condense block comment explaining trackingOptions/loadOptions split - Remove duplicate inline comments at updateTracking call sites - Remove stray console.log and unused minusWherePredicates import - Fix "exponentially" → "unboundedly" in test comment (growth is linear) - Remove product-specific "sessions index page" reference from test - Strengthen expression assertion to verify full NOT(IN(...)) structure - Add sync return path test for the tracking fix - Simplify test by removing unused calls array Co-Authored-By: Claude Opus 4.6 --- packages/db/src/query/subset-dedupe.ts | 21 ++---- packages/db/tests/query/subset-dedupe.test.ts | 75 +++++++++++++------ 2 files changed, 59 insertions(+), 37 deletions(-) diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts index 991a5a8f3..87d703b74 100644 --- a/packages/db/src/query/subset-dedupe.ts +++ b/packages/db/src/query/subset-dedupe.ts @@ -126,17 +126,10 @@ export class DeduplicatedLoadSubset { return prom } - // Not fully covered by existing data - // Compute the subset of data that is not covered by the existing data - // such that we only have to load that subset of missing data - // We clone options twice: - // - trackingOptions: preserves the original predicate for accurate tracking - // - loadOptions: may be modified with the difference expression for the actual request - // This separation is critical: tracking must reflect what data was REQUESTED - // (e.g., where=undefined means "all data"), not the optimized query that was sent - // to the backend (e.g., NOT(already_loaded)). Without this, a "load all" request - // would track the difference expression instead of setting hasLoadedAllData=true, - // causing unbounded expression growth on subsequent requests. + // Not fully covered by existing data — load the missing subset. + // We need two clones: trackingOptions preserves the original predicate for + // accurate tracking (e.g., where=undefined means "all data"), while loadOptions + // may be narrowed with a difference expression for the actual backend request. const trackingOptions = cloneOptions(options) const loadOptions = cloneOptions(options) if (this.unlimitedWhere !== undefined && options.limit === undefined) { @@ -154,9 +147,7 @@ export class DeduplicatedLoadSubset { // Handle both sync (true) and async (Promise) return values if (resultPromise === true) { - // Sync return - update tracking synchronously - // Use trackingOptions (original predicate) so tracking accurately reflects - // what was requested, not the optimized difference query + // Sync return - update tracking with the original predicate this.updateTracking(trackingOptions) return true } else { @@ -175,8 +166,6 @@ export class DeduplicatedLoadSubset { // If reset() was called, the generation will have incremented and we should // not repopulate the state that was just cleared if (capturedGeneration === this.generation) { - // Use trackingOptions (original predicate) so tracking accurately reflects - // what was requested, not the optimized difference query this.updateTracking(trackingOptions) } return result diff --git a/packages/db/tests/query/subset-dedupe.test.ts b/packages/db/tests/query/subset-dedupe.test.ts index 60cb53937..16ae3287d 100644 --- a/packages/db/tests/query/subset-dedupe.test.ts +++ b/packages/db/tests/query/subset-dedupe.test.ts @@ -4,7 +4,6 @@ import { cloneOptions, } from '../../src/query/subset-dedupe' import { Func, PropRef, Value } from '../../src/query/ir' -import { minusWherePredicates } from '../../src/query/predicate-utils' import type { BasicExpression, OrderBy } from '../../src/query/ir' import type { LoadSubsetOptions } from '../../src/types' @@ -517,9 +516,6 @@ describe(`createDeduplicatedLoadSubset`, () => { eq(ref(`status`), val(`active`)), ) - const test = minusWherePredicates(secondPredicate, firstPredicate) - console.log(`test`, test) - await deduplicated.loadSubset({ where: secondPredicate }) expect(callCount).toBe(2) expect(calls[1]).toEqual({ @@ -596,21 +592,20 @@ describe(`createDeduplicatedLoadSubset`, () => { // i.e. should request NOT (age > 20) await deduplicated.loadSubset({}) expect(callCount).toBe(2) - expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded + expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) - // After loading all data (undefined where), subsequent calls should be deduplicated + // After loading all data, subsequent calls should be deduplicated const result = await deduplicated.loadSubset({ where: gt(ref(`age`), val(5)), }) - expect(result).toBe(true) // Should be covered since we loaded all data - expect(callCount).toBe(2) // No additional call needed + expect(result).toBe(true) + expect(callCount).toBe(2) }) it(`should not produce unbounded WHERE expressions when loading all data after eq accumulation`, async () => { - // This test reproduces the production bug where navigating to many tasks - // (each adding eq(task_id, uuid) to the accumulator) and then loading all data - // (no WHERE clause) caused exponentially growing expressions instead of - // correctly setting hasLoadedAllData=true. + // This test reproduces the production bug where accumulating many eq predicates + // and then loading all data (no WHERE clause) caused unboundedly growing + // expressions instead of correctly setting hasLoadedAllData=true. let callCount = 0 const calls: Array = [] const mockLoadSubset = (options: LoadSubsetOptions) => { @@ -632,14 +627,19 @@ describe(`createDeduplicatedLoadSubset`, () => { // After 10 eq calls, unlimitedWhere should be IN(task_id, [uuid-0, ..., uuid-9]) expect(callCount).toBe(10) - // Now load all data (sessions index page with no WHERE clause) + // Now load all data (no WHERE clause) // This should send NOT(IN(...)) to the backend but track as "all data loaded" await deduplicated.loadSubset({}) expect(callCount).toBe(11) - // The load request should be NOT(accumulated predicate) - expect(calls[10]!.where).toBeDefined() - expect((calls[10]!.where as any).name).toBe(`not`) + // The load request should be NOT(IN(task_id, [all accumulated uuids])) + const loadWhere = calls[10]!.where as any + expect(loadWhere.name).toBe(`not`) + expect(loadWhere.args[0].name).toBe(`in`) + expect(loadWhere.args[0].args[0].path).toEqual([`task_id`]) + const loadedUuids = (loadWhere.args[0].args[1].value as Array).sort() + const expectedUuids = Array.from({ length: 10 }, (_, i) => `uuid-${i}`).sort() + expect(loadedUuids).toEqual(expectedUuids) // Critical: after loading all data, subsequent requests should be deduplicated const result1 = await deduplicated.loadSubset({ @@ -654,14 +654,47 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(callCount).toBe(11) // Still no additional call }) + it(`should not produce unbounded WHERE expressions with synchronous loadSubset`, async () => { + // Same scenario as the async accumulation test, but with a sync mock + // to exercise the sync return path (line 150 of subset-dedupe.ts) + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return true as const + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // Accumulate eq predicates via sync returns + for (let i = 0; i < 10; i++) { + deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-${i}`)), + }) + } + expect(callCount).toBe(10) + + // Load all data (no WHERE clause) — should track as "all data loaded" + deduplicated.loadSubset({}) + expect(callCount).toBe(11) + + // Subsequent requests should be deduplicated + const result1 = deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-999`)), + }) + expect(result1).toBe(true) + expect(callCount).toBe(11) + + const result2 = deduplicated.loadSubset({}) + expect(result2).toBe(true) + expect(callCount).toBe(11) + }) + it(`should handle multiple all-data loads without expression growth`, async () => { - // Regression test: repeated "load all" requests should not cause - // the tracked predicate to grow unboundedly let callCount = 0 - const calls: Array = [] - const mockLoadSubset = (options: LoadSubsetOptions) => { + const mockLoadSubset = () => { callCount++ - calls.push(cloneOptions(options)) return Promise.resolve() } From 77750e7836deb9d34af73aff46d2664a72ea84e3 Mon Sep 17 00:00:00 2001 From: Kyle Mathews Date: Tue, 10 Mar 2026 15:56:26 -0600 Subject: [PATCH 3/4] Add changeset for unbounded WHERE growth fix Co-Authored-By: Claude Opus 4.6 --- .changeset/fix-unbounded-where-growth.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/fix-unbounded-where-growth.md diff --git a/.changeset/fix-unbounded-where-growth.md b/.changeset/fix-unbounded-where-growth.md new file mode 100644 index 000000000..777f0b5f2 --- /dev/null +++ b/.changeset/fix-unbounded-where-growth.md @@ -0,0 +1,5 @@ +--- +'@tanstack/db': patch +--- + +Fix unbounded WHERE expression growth in `DeduplicatedLoadSubset` when loading all data after accumulating specific predicates. The deduplication layer now correctly tracks the original request predicate (e.g., `where: undefined` for "load all") instead of the optimized difference query sent to the backend, ensuring `hasLoadedAllData` is properly set and subsequent requests are deduplicated. From 3a1731954bd31038499f2fc48ec383a4cc271391 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:57:18 +0000 Subject: [PATCH 4/4] ci: apply automated fixes --- packages/db/tests/query/subset-dedupe.test.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/db/tests/query/subset-dedupe.test.ts b/packages/db/tests/query/subset-dedupe.test.ts index 16ae3287d..3be135ed5 100644 --- a/packages/db/tests/query/subset-dedupe.test.ts +++ b/packages/db/tests/query/subset-dedupe.test.ts @@ -637,8 +637,13 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(loadWhere.name).toBe(`not`) expect(loadWhere.args[0].name).toBe(`in`) expect(loadWhere.args[0].args[0].path).toEqual([`task_id`]) - const loadedUuids = (loadWhere.args[0].args[1].value as Array).sort() - const expectedUuids = Array.from({ length: 10 }, (_, i) => `uuid-${i}`).sort() + const loadedUuids = ( + loadWhere.args[0].args[1].value as Array + ).sort() + const expectedUuids = Array.from( + { length: 10 }, + (_, i) => `uuid-${i}`, + ).sort() expect(loadedUuids).toEqual(expectedUuids) // Critical: after loading all data, subsequent requests should be deduplicated