diff --git a/.changeset/fix-unbounded-where-growth.md b/.changeset/fix-unbounded-where-growth.md new file mode 100644 index 000000000..777f0b5f2 --- /dev/null +++ b/.changeset/fix-unbounded-where-growth.md @@ -0,0 +1,5 @@ +--- +'@tanstack/db': patch +--- + +Fix unbounded WHERE expression growth in `DeduplicatedLoadSubset` when loading all data after accumulating specific predicates. The deduplication layer now correctly tracks the original request predicate (e.g., `where: undefined` for "load all") instead of the optimized difference query sent to the backend, ensuring `hasLoadedAllData` is properly set and subsequent requests are deduplicated. diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts index 4959d3cbb..87d703b74 100644 --- a/packages/db/src/query/subset-dedupe.ts +++ b/packages/db/src/query/subset-dedupe.ts @@ -126,28 +126,29 @@ export class DeduplicatedLoadSubset { return prom } - // Not fully covered by existing data - // Compute the subset of data that is not covered by the existing data - // such that we only have to load that subset of missing data - const clonedOptions = cloneOptions(options) + // Not fully covered by existing data — load the missing subset. + // We need two clones: trackingOptions preserves the original predicate for + // accurate tracking (e.g., where=undefined means "all data"), while loadOptions + // may be narrowed with a difference expression for the actual backend request. + const trackingOptions = cloneOptions(options) + const loadOptions = cloneOptions(options) if (this.unlimitedWhere !== undefined && options.limit === undefined) { // Compute difference to get only the missing data // We can only do this for unlimited queries // and we can only remove data that was loaded from unlimited queries // because with limited queries we have no way to express that we already loaded part of the matching data - clonedOptions.where = - minusWherePredicates(clonedOptions.where, this.unlimitedWhere) ?? - clonedOptions.where + loadOptions.where = + minusWherePredicates(loadOptions.where, this.unlimitedWhere) ?? + loadOptions.where } // Call underlying loadSubset to load the missing data - const resultPromise = this._loadSubset(clonedOptions) + const resultPromise = this._loadSubset(loadOptions) // Handle both sync (true) and async (Promise) return values if (resultPromise === true) { - // Sync return - update tracking synchronously - // Clone options before storing to protect against caller mutation - this.updateTracking(clonedOptions) + // Sync return - update tracking with the original predicate + this.updateTracking(trackingOptions) return true } else { // Async return - track the promise and update tracking after it resolves @@ -158,16 +159,14 @@ export class DeduplicatedLoadSubset { // We need to create a reference to the in-flight entry so we can remove it later const inflightEntry = { - options: clonedOptions, // Store cloned options for subset matching + options: loadOptions, // Store load options for subset matching of in-flight requests promise: resultPromise .then((result) => { // Only update tracking if this request is still from the current generation // If reset() was called, the generation will have incremented and we should // not repopulate the state that was just cleared if (capturedGeneration === this.generation) { - // Use the cloned options that we captured before any caller mutations - // This ensures we track exactly what was loaded, not what the caller changed - this.updateTracking(clonedOptions) + this.updateTracking(trackingOptions) } return result }) diff --git a/packages/db/tests/query/subset-dedupe.test.ts b/packages/db/tests/query/subset-dedupe.test.ts index 9067c931f..3be135ed5 100644 --- a/packages/db/tests/query/subset-dedupe.test.ts +++ b/packages/db/tests/query/subset-dedupe.test.ts @@ -4,7 +4,6 @@ import { cloneOptions, } from '../../src/query/subset-dedupe' import { Func, PropRef, Value } from '../../src/query/ir' -import { minusWherePredicates } from '../../src/query/predicate-utils' import type { BasicExpression, OrderBy } from '../../src/query/ir' import type { LoadSubsetOptions } from '../../src/types' @@ -517,9 +516,6 @@ describe(`createDeduplicatedLoadSubset`, () => { eq(ref(`status`), val(`active`)), ) - const test = minusWherePredicates(secondPredicate, firstPredicate) - console.log(`test`, test) - await deduplicated.loadSubset({ where: secondPredicate }) expect(callCount).toBe(2) expect(calls[1]).toEqual({ @@ -596,7 +592,135 @@ describe(`createDeduplicatedLoadSubset`, () => { // i.e. should request NOT (age > 20) await deduplicated.loadSubset({}) expect(callCount).toBe(2) - expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded + expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) + + // After loading all data, subsequent calls should be deduplicated + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(5)), + }) + expect(result).toBe(true) + expect(callCount).toBe(2) + }) + + it(`should not produce unbounded WHERE expressions when loading all data after eq accumulation`, async () => { + // This test reproduces the production bug where accumulating many eq predicates + // and then loading all data (no WHERE clause) caused unboundedly growing + // expressions instead of correctly setting hasLoadedAllData=true. + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(cloneOptions(options)) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // Simulate visiting multiple tasks, each adding an eq predicate + for (let i = 0; i < 10; i++) { + await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-${i}`)), + }) + } + // After 10 eq calls, unlimitedWhere should be IN(task_id, [uuid-0, ..., uuid-9]) + expect(callCount).toBe(10) + + // Now load all data (no WHERE clause) + // This should send NOT(IN(...)) to the backend but track as "all data loaded" + await deduplicated.loadSubset({}) + expect(callCount).toBe(11) + + // The load request should be NOT(IN(task_id, [all accumulated uuids])) + const loadWhere = calls[10]!.where as any + expect(loadWhere.name).toBe(`not`) + expect(loadWhere.args[0].name).toBe(`in`) + expect(loadWhere.args[0].args[0].path).toEqual([`task_id`]) + const loadedUuids = ( + loadWhere.args[0].args[1].value as Array + ).sort() + const expectedUuids = Array.from( + { length: 10 }, + (_, i) => `uuid-${i}`, + ).sort() + expect(loadedUuids).toEqual(expectedUuids) + + // Critical: after loading all data, subsequent requests should be deduplicated + const result1 = await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-999`)), + }) + expect(result1).toBe(true) // Covered by "all data" load + expect(callCount).toBe(11) // No additional call + + // Loading all data again should also be deduplicated + const result2 = await deduplicated.loadSubset({}) + expect(result2).toBe(true) + expect(callCount).toBe(11) // Still no additional call + }) + + it(`should not produce unbounded WHERE expressions with synchronous loadSubset`, async () => { + // Same scenario as the async accumulation test, but with a sync mock + // to exercise the sync return path (line 150 of subset-dedupe.ts) + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return true as const + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // Accumulate eq predicates via sync returns + for (let i = 0; i < 10; i++) { + deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-${i}`)), + }) + } + expect(callCount).toBe(10) + + // Load all data (no WHERE clause) — should track as "all data loaded" + deduplicated.loadSubset({}) + expect(callCount).toBe(11) + + // Subsequent requests should be deduplicated + const result1 = deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-999`)), + }) + expect(result1).toBe(true) + expect(callCount).toBe(11) + + const result2 = deduplicated.loadSubset({}) + expect(result2).toBe(true) + expect(callCount).toBe(11) + }) + + it(`should handle multiple all-data loads without expression growth`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset({ + loadSubset: mockLoadSubset, + }) + + // First: load some specific data + await deduplicated.loadSubset({ + where: eq(ref(`task_id`), val(`uuid-1`)), + }) + expect(callCount).toBe(1) + + // Load all data (first time) + await deduplicated.loadSubset({}) + expect(callCount).toBe(2) + + // Load all data (second time) - should be deduplicated since we already have everything + const result = await deduplicated.loadSubset({}) + expect(result).toBe(true) + expect(callCount).toBe(2) // No additional call - all data already loaded }) it(`should handle multiple overlapping unlimited calls`, async () => {