Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-unbounded-where-growth.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@tanstack/db': patch
---

Fix unbounded WHERE expression growth in `DeduplicatedLoadSubset` when loading all data after accumulating specific predicates. The deduplication layer now correctly tracks the original request predicate (e.g., `where: undefined` for "load all") instead of the optimized difference query sent to the backend, ensuring `hasLoadedAllData` is properly set and subsequent requests are deduplicated.
29 changes: 14 additions & 15 deletions packages/db/src/query/subset-dedupe.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,28 +126,29 @@ export class DeduplicatedLoadSubset {
return prom
}

// Not fully covered by existing data
// Compute the subset of data that is not covered by the existing data
// such that we only have to load that subset of missing data
const clonedOptions = cloneOptions(options)
// Not fully covered by existing data — load the missing subset.
// We need two clones: trackingOptions preserves the original predicate for
// accurate tracking (e.g., where=undefined means "all data"), while loadOptions
// may be narrowed with a difference expression for the actual backend request.
const trackingOptions = cloneOptions(options)
const loadOptions = cloneOptions(options)
if (this.unlimitedWhere !== undefined && options.limit === undefined) {
// Compute difference to get only the missing data
// We can only do this for unlimited queries
// and we can only remove data that was loaded from unlimited queries
// because with limited queries we have no way to express that we already loaded part of the matching data
clonedOptions.where =
minusWherePredicates(clonedOptions.where, this.unlimitedWhere) ??
clonedOptions.where
loadOptions.where =
minusWherePredicates(loadOptions.where, this.unlimitedWhere) ??
loadOptions.where
}

// Call underlying loadSubset to load the missing data
const resultPromise = this._loadSubset(clonedOptions)
const resultPromise = this._loadSubset(loadOptions)

// Handle both sync (true) and async (Promise<void>) return values
if (resultPromise === true) {
// Sync return - update tracking synchronously
// Clone options before storing to protect against caller mutation
this.updateTracking(clonedOptions)
// Sync return - update tracking with the original predicate
this.updateTracking(trackingOptions)
return true
} else {
// Async return - track the promise and update tracking after it resolves
Expand All @@ -158,16 +159,14 @@ export class DeduplicatedLoadSubset {

// We need to create a reference to the in-flight entry so we can remove it later
const inflightEntry = {
options: clonedOptions, // Store cloned options for subset matching
options: loadOptions, // Store load options for subset matching of in-flight requests
promise: resultPromise
.then((result) => {
// Only update tracking if this request is still from the current generation
// If reset() was called, the generation will have incremented and we should
// not repopulate the state that was just cleared
if (capturedGeneration === this.generation) {
// Use the cloned options that we captured before any caller mutations
// This ensures we track exactly what was loaded, not what the caller changed
this.updateTracking(clonedOptions)
this.updateTracking(trackingOptions)
}
return result
})
Expand Down
134 changes: 129 additions & 5 deletions packages/db/tests/query/subset-dedupe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import {
cloneOptions,
} from '../../src/query/subset-dedupe'
import { Func, PropRef, Value } from '../../src/query/ir'
import { minusWherePredicates } from '../../src/query/predicate-utils'
import type { BasicExpression, OrderBy } from '../../src/query/ir'
import type { LoadSubsetOptions } from '../../src/types'

Expand Down Expand Up @@ -517,9 +516,6 @@ describe(`createDeduplicatedLoadSubset`, () => {
eq(ref(`status`), val(`active`)),
)

const test = minusWherePredicates(secondPredicate, firstPredicate)
console.log(`test`, test)

await deduplicated.loadSubset({ where: secondPredicate })
expect(callCount).toBe(2)
expect(calls[1]).toEqual({
Expand Down Expand Up @@ -596,7 +592,135 @@ describe(`createDeduplicatedLoadSubset`, () => {
// i.e. should request NOT (age > 20)
await deduplicated.loadSubset({})
expect(callCount).toBe(2)
expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) }) // Should request all data except what we already loaded
expect(calls[1]).toEqual({ where: not(gt(ref(`age`), val(20))) })

// After loading all data, subsequent calls should be deduplicated
const result = await deduplicated.loadSubset({
where: gt(ref(`age`), val(5)),
})
expect(result).toBe(true)
expect(callCount).toBe(2)
})

it(`should not produce unbounded WHERE expressions when loading all data after eq accumulation`, async () => {
// This test reproduces the production bug where accumulating many eq predicates
// and then loading all data (no WHERE clause) caused unboundedly growing
// expressions instead of correctly setting hasLoadedAllData=true.
let callCount = 0
const calls: Array<LoadSubsetOptions> = []
const mockLoadSubset = (options: LoadSubsetOptions) => {
callCount++
calls.push(cloneOptions(options))
return Promise.resolve()
}

const deduplicated = new DeduplicatedLoadSubset({
loadSubset: mockLoadSubset,
})

// Simulate visiting multiple tasks, each adding an eq predicate
for (let i = 0; i < 10; i++) {
await deduplicated.loadSubset({
where: eq(ref(`task_id`), val(`uuid-${i}`)),
})
}
// After 10 eq calls, unlimitedWhere should be IN(task_id, [uuid-0, ..., uuid-9])
expect(callCount).toBe(10)

// Now load all data (no WHERE clause)
// This should send NOT(IN(...)) to the backend but track as "all data loaded"
await deduplicated.loadSubset({})
expect(callCount).toBe(11)

// The load request should be NOT(IN(task_id, [all accumulated uuids]))
const loadWhere = calls[10]!.where as any
expect(loadWhere.name).toBe(`not`)
expect(loadWhere.args[0].name).toBe(`in`)
expect(loadWhere.args[0].args[0].path).toEqual([`task_id`])
const loadedUuids = (
loadWhere.args[0].args[1].value as Array<string>
).sort()
const expectedUuids = Array.from(
{ length: 10 },
(_, i) => `uuid-${i}`,
).sort()
expect(loadedUuids).toEqual(expectedUuids)

// Critical: after loading all data, subsequent requests should be deduplicated
const result1 = await deduplicated.loadSubset({
where: eq(ref(`task_id`), val(`uuid-999`)),
})
expect(result1).toBe(true) // Covered by "all data" load
expect(callCount).toBe(11) // No additional call

// Loading all data again should also be deduplicated
const result2 = await deduplicated.loadSubset({})
expect(result2).toBe(true)
expect(callCount).toBe(11) // Still no additional call
})

it(`should not produce unbounded WHERE expressions with synchronous loadSubset`, async () => {
// Same scenario as the async accumulation test, but with a sync mock
// to exercise the sync return path (line 150 of subset-dedupe.ts)
let callCount = 0
const mockLoadSubset = () => {
callCount++
return true as const
}

const deduplicated = new DeduplicatedLoadSubset({
loadSubset: mockLoadSubset,
})

// Accumulate eq predicates via sync returns
for (let i = 0; i < 10; i++) {
deduplicated.loadSubset({
where: eq(ref(`task_id`), val(`uuid-${i}`)),
})
}
expect(callCount).toBe(10)

// Load all data (no WHERE clause) — should track as "all data loaded"
deduplicated.loadSubset({})
expect(callCount).toBe(11)

// Subsequent requests should be deduplicated
const result1 = deduplicated.loadSubset({
where: eq(ref(`task_id`), val(`uuid-999`)),
})
expect(result1).toBe(true)
expect(callCount).toBe(11)

const result2 = deduplicated.loadSubset({})
expect(result2).toBe(true)
expect(callCount).toBe(11)
})

it(`should handle multiple all-data loads without expression growth`, async () => {
let callCount = 0
const mockLoadSubset = () => {
callCount++
return Promise.resolve()
}

const deduplicated = new DeduplicatedLoadSubset({
loadSubset: mockLoadSubset,
})

// First: load some specific data
await deduplicated.loadSubset({
where: eq(ref(`task_id`), val(`uuid-1`)),
})
expect(callCount).toBe(1)

// Load all data (first time)
await deduplicated.loadSubset({})
expect(callCount).toBe(2)

// Load all data (second time) - should be deduplicated since we already have everything
const result = await deduplicated.loadSubset({})
expect(result).toBe(true)
expect(callCount).toBe(2) // No additional call - all data already loaded
})

it(`should handle multiple overlapping unlimited calls`, async () => {
Expand Down
Loading