From 4f1dcbcfff7af35b2a41309698aaaad89a78f7e4 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 13 Sep 2025 17:35:24 +0100 Subject: [PATCH 1/5] impliment prefixed index to remove hashing from joins initial load --- packages/db-ivm/src/hashIndex.ts | 94 --------- packages/db-ivm/src/indexes.ts | 318 +++++++++++++++++++++--------- packages/db-ivm/src/valueIndex.ts | 78 -------- 3 files changed, 220 insertions(+), 270 deletions(-) delete mode 100644 packages/db-ivm/src/hashIndex.ts delete mode 100644 packages/db-ivm/src/valueIndex.ts diff --git a/packages/db-ivm/src/hashIndex.ts b/packages/db-ivm/src/hashIndex.ts deleted file mode 100644 index cc9df1b0c..000000000 --- a/packages/db-ivm/src/hashIndex.ts +++ /dev/null @@ -1,94 +0,0 @@ -import { DefaultMap } from "./utils.js" -import { hash } from "./hashing/index.js" -import type { Hash } from "./hashing/index.js" - -/** - * A map from a difference collection trace's keys -> (value, multiplicities) that changed. - * Used in operations like join and reduce where the operation needs to - * exploit the key-value structure of the data to run efficiently. - */ -export class HashIndex { - #inner: DefaultMap> - - constructor() { - this.#inner = new DefaultMap>( - () => new DefaultMap(() => [undefined as any as V, 0]) - ) - // #inner is as map of: - // { - // [key]: { - // [hash(value)]: [value, multiplicity] - // } - // } - } - - toString(indent = false): string { - return `HashIndex(${JSON.stringify( - [...this.#inner].map(([k, valueMap]) => [k, [...valueMap]]), - undefined, - indent ? 2 : undefined - )})` - } - - get(key: K): Array<[V, number]> { - const valueMap = this.#inner.get(key) - return [...valueMap.values()] - } - - getMultiplicity(key: K, value: V): number { - const valueMap = this.#inner.get(key) - const valueHash = hash(value) - const [, multiplicity] = valueMap.get(valueHash) - return multiplicity - } - - entries() { - return this.#inner.entries() - } - - *entriesIterator(): Generator<[K, [V, number]]> { - for (const [key, valueMap] of this.#inner.entries()) { - for (const [_valueHash, [value, multiplicity]] of valueMap.entries()) { - yield [key, [value, multiplicity]] - } - } - } - - has(key: K): boolean { - return this.#inner.has(key) - } - - delete(key: K): void { - this.#inner.delete(key) - } - - get size(): number { - return this.#inner.size - } - - /** - * Adds a value to the index and does not return anything - * except if the addition caused the value to be removed - * and the key to be left with only a single value. - * In that case, we return the single remaining value. - */ - addValue(key: K, value: [V, number]): [V, number] | void { - const [val, multiplicity] = value - const valueMap = this.#inner.get(key) - const valueHash = hash(val) - const [, existingMultiplicity] = valueMap.get(valueHash) - const newMultiplicity = existingMultiplicity + multiplicity - if (multiplicity !== 0) { - if (newMultiplicity === 0) { - valueMap.delete(valueHash) - if (valueMap.size === 1) { - // Signal that the key only has a single remaining value - return valueMap.entries().next().value![1] - } - } else { - valueMap.set(valueHash, [val, newMultiplicity]) - } - } - this.#inner.set(key, valueMap) - } -} diff --git a/packages/db-ivm/src/indexes.ts b/packages/db-ivm/src/indexes.ts index 27131fc29..16be2a57b 100644 --- a/packages/db-ivm/src/indexes.ts +++ b/packages/db-ivm/src/indexes.ts @@ -1,60 +1,77 @@ import { MultiSet } from "./multiset.js" -import { HashIndex } from "./hashIndex.js" -import { ValueIndex } from "./valueIndex.js" -import { concatIterable, mapIterable } from "./utils.js" - -/** - * A map from a difference collection trace's keys -> (value, multiplicities) that changed. - * Used in operations like join and reduce where the operation needs to - * exploit the key-value structure of the data to run efficiently. - */ -export class Index { - /* - * This is a hybrid Index that composes a ValueIndex and a HashIndex. - * Keys that have only one value are stored in the ValueIndex. - * Keys that have multiple values are stored in the HashIndex, the hash distinguishes between the values. - * This reduces the amount of hashes we need to compute since often times only a small portion of the keys are updated - * so we don't have to hash the keys that are never updated. - * - * Note: The `valueIndex` and `hashIndex` have disjoint keys. - * When a key that has only one value gets a new distinct value, - * it is added to the `hashIndex` and removed from the `valueIndex` and vice versa. - */ - #valueIndex: ValueIndex - #hashIndex: HashIndex +import { hash } from "./hashing/index.js" + +const NO_PREFIX = Symbol(`NO_PREFIX`) +type NO_PREFIX = typeof NO_PREFIX + +type Hash = number +type SingleValue = [TValue, number] +type IndexMap = Map< + TKey, + SingleValue | PrefixMap +> +type PrefixMap = Map< + TPrefix | NO_PREFIX, + SingleValue | ValueMap +> +type ValueMap = Map + +export class Index { + #inner: IndexMap constructor() { - this.#valueIndex = new ValueIndex() - this.#hashIndex = new HashIndex() + this.#inner = new Map() } toString(indent = false): string { - return `Index(\n ${this.#valueIndex.toString(indent)},\n ${this.#hashIndex.toString(indent)}\n)` + return `Index(${JSON.stringify( + [...this.entries()], + undefined, + indent ? 2 : undefined + )})` } - get(key: K): Array<[V, number]> { - if (this.#valueIndex.has(key)) { - return [this.#valueIndex.get(key)!] - } - return this.#hashIndex.get(key) + get size(): number { + return this.#inner.size } - getMultiplicity(key: K, value: V): number { - if (this.#valueIndex.has(key)) { - return this.#valueIndex.getMultiplicity(key) + has(key: TKey): boolean { + return this.#inner.has(key) + } + + get(key: TKey): Array<[TValue, number]> { + return [...this.getIterator(key)] + } + + *getIterator(key: TKey): Iterable<[TValue, number]> { + const prefixMapOrSingleValue = this.#inner.get(key) + if (isSingleValue(prefixMapOrSingleValue)) { + yield prefixMapOrSingleValue + } else if (prefixMapOrSingleValue === undefined) { + return + } else { + for (const singleValueOrValueMap of prefixMapOrSingleValue.values()) { + if (isSingleValue(singleValueOrValueMap)) { + yield singleValueOrValueMap + } else { + for (const valueTuple of singleValueOrValueMap.values()) { + yield valueTuple + } + } + } } - return this.#hashIndex.getMultiplicity(key, value) } /** * This returns an iterator that iterates over all key-value pairs. * @returns An iterable of all key-value pairs (and their multiplicities) in the index. */ - #entries(): Iterable<[K, [V, number]]> { - return concatIterable( - this.#valueIndex.entries(), - this.#hashIndex.entriesIterator() - ) + *entries(): Iterable<[TKey, [TValue, number]]> { + for (const key of this.#inner.keys()) { + for (const valueTuple of this.getIterator(key)) { + yield [key, valueTuple] + } + } } /** @@ -63,86 +80,168 @@ export class Index { * It returns an iterator that you can use if you need to iterate over the values for a given key. * @returns An iterator of all *keys* in the index and their corresponding value iterator. */ - *#entriesIterators(): Iterable<[K, Iterable<[V, number]>]> { - for (const [key, [value, multiplicity]] of this.#valueIndex.entries()) { - yield [key, new Map([[value, multiplicity]])] - } - for (const [key, valueMap] of this.#hashIndex.entries()) { - yield [ - key, - mapIterable(valueMap, ([_hash, [value, multiplicity]]) => [ - value, - multiplicity, - ]), - ] + *#entriesIterators(): Iterable<[TKey, Iterable<[TValue, number]>]> { + for (const key of this.#inner.keys()) { + yield [key, this.getIterator(key)] } } - has(key: K): boolean { - return this.#valueIndex.has(key) || this.#hashIndex.has(key) - } + addValue(key: TKey, valueTuple: SingleValue) { + const [value, multiplicity] = valueTuple + // If the multiplicity is 0, do nothing + if (multiplicity === 0) return - get size(): number { - return this.#valueIndex.size + this.#hashIndex.size - } + const prefixMapOrSingleValue = this.#inner.get(key) - addValue(key: K, value: [V, number]): void { - const containedInValueIndex = this.#valueIndex.has(key) - const containedInHashIndex = this.#hashIndex.has(key) + if (prefixMapOrSingleValue === undefined) { + // This is the first time we see a value for this key we just insert it + // into the index as a single value tuple + this.#inner.set(key, valueTuple) + return + } - if (containedInHashIndex && containedInValueIndex) { - throw new Error( - `Key ${key} is contained in both the value index and the hash index. This should never happen because they should have disjoint keysets.` - ) + const [currentSingleValueForKey, prefixMap] = isSingleValue( + prefixMapOrSingleValue + ) + ? [prefixMapOrSingleValue, undefined] + : [undefined, prefixMapOrSingleValue] + + if (currentSingleValueForKey) { + const [currentValue, currentMultiplicity] = currentSingleValueForKey + // We have a single value for this key, lets check if this is the same value + // and if so we just update the multiplicity. This is a check if its the same + // literal value or object reference. + if (currentValue === value) { + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.#inner.delete(key) + } else { + this.#inner.set(key, [value, newMultiplicity]) + } + return + } } - if (!containedInValueIndex && !containedInHashIndex) { - // This is the first time we see the key - // Add it to the value index - this.#valueIndex.addValue(key, value) - return + // Get the prefix of the new value + const [prefix, suffix] = getPrefix(value) + + if (currentSingleValueForKey) { + const [currentValue, currentMultiplicity] = currentSingleValueForKey + const [currentPrefix, currentSuffix] = getPrefix( + currentValue + ) + if ( + currentPrefix === prefix && + (currentSuffix === suffix || hash(currentSuffix) === hash(suffix)) + ) { + // They are the same value, so we just update the multiplicity + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.#inner.delete(key) + } else { + this.#inner.set(key, [value, newMultiplicity]) + } + return + } else { + // They are different values, so we need to move the current value to a + // new prefix map + const newPrefixMap = new Map< + TPrefix | NO_PREFIX, + SingleValue | ValueMap + >() + this.#inner.set(key, newPrefixMap) + + if (currentPrefix === prefix) { + // They have the same prefix but different suffixes, so we need to add a + // value map for this suffix to the prefix map + const valueMap = new Map() + valueMap.set(hash(currentSuffix), currentSingleValueForKey) + valueMap.set(hash(suffix), valueTuple) + newPrefixMap.set(currentPrefix, valueMap) + } else { + // They have different prefixes, so we can add then as singe values to the + // prefix map + newPrefixMap.set(currentPrefix, currentSingleValueForKey) + newPrefixMap.set(prefix, valueTuple) + } + return + } } - if (containedInValueIndex) { - // This key is already in the value index - // It could be that it's the same value or a different one - // If it's a different value we will need to remove the key from the value index - // and add the key and its two values to the hash index - try { - this.#valueIndex.addValue(key, value) - } catch { - // This is a different value, need to move the key to the hash index - const existingValue = this.#valueIndex.get(key)! - this.#valueIndex.delete(key) - this.#hashIndex.addValue(key, existingValue) - this.#hashIndex.addValue(key, value) + // At this point there is a prefix map for this key, we need the value map or + // single value for this prefix + const valueMapOrSingleValue = prefixMap.get(prefix) + + const [valueMap, currentSingleValueForPrefix] = isSingleValue( + valueMapOrSingleValue + ) + ? [undefined, valueMapOrSingleValue] + : [valueMapOrSingleValue, undefined] + + if (currentSingleValueForPrefix) { + const [currentValue, currentMultiplicity] = currentSingleValueForPrefix + const [currentPrefix, currentSuffix] = getPrefix( + currentValue + ) + if (currentPrefix !== prefix) { + throw new Error(`Mismatching prefixes, this should never happen`) + } + if (currentSuffix === suffix || hash(currentSuffix) === hash(suffix)) { + // They are the same value, so we just update the multiplicity + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + prefixMap.delete(prefix) + } else { + prefixMap.set(prefix, [value, newMultiplicity]) + } + return + } else { + // They have different suffixes, so we need to add a value map for this suffix + // to the prefix map + const valueMap = new Map() + valueMap.set(hash(currentSuffix), currentSingleValueForPrefix) + valueMap.set(hash(suffix), valueTuple) + prefixMap.set(prefix, valueMap) + return } + } + + // At this point there was no single value for the prefix, there *may* be + // a value map for this prefix. If there is not, we can just add the new value + // as a single value to the prefix map + if (!valueMap) { + prefixMap.set(prefix, valueTuple) return } - if (containedInHashIndex) { - // This key is already in the hash index so it already has two or more values. - // However, this new value and multiplicity could cause an existing value to be removed - // and lead to the key having only a single value in which case we need to move it back to the value index - const singleRemainingValue = this.#hashIndex.addValue(key, value) - if (singleRemainingValue) { - // The key only has a single remaining value so we need to move it back to the value index - this.#hashIndex.delete(key) - this.#valueIndex.addValue(key, singleRemainingValue) + // We now know there is a value map for this prefix, we need see if there is a + // current value for the suffix. If there is, we update the multiplicity, otherwise + // we add the new value as a single value to the value map + const suffixHash = hash(suffix) + const currentValueForSuffix = valueMap.get(suffixHash) + if (currentValueForSuffix) { + const [, currentMultiplicity] = currentValueForSuffix + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + valueMap.delete(suffixHash) + } else { + valueMap.set(suffixHash, [value, newMultiplicity]) } - return + } else { + valueMap.set(suffixHash, valueTuple) } } - append(other: Index): void { - for (const [key, value] of other.#entries()) { + append(other: Index): void { + for (const [key, value] of other.entries()) { this.addValue(key, value) } } - join(other: Index): MultiSet<[K, [V, V2]]> { - const result: Array<[[K, [V, V2]], number]> = [] - + join( + other: Index + ): MultiSet<[TKey, [TValue, TValue2]]> { + const result: Array<[[TKey, [TValue, TValue2]], number]> = [] // We want to iterate over the smaller of the two indexes to reduce the // number of operations we need to do. if (this.size <= other.size) { @@ -174,3 +273,26 @@ export class Index { return new MultiSet(result) } } + +function getPrefix( + value: TValue +): [TPrefix | NO_PREFIX, TValue] { + // If the value is an array of two elements and the first element is a string + // or number, then the first element is the prefix. This is used to distinguish + // between values without the need for hashing unless there are multiple values + // for the same prefix. + if ( + Array.isArray(value) && + value.length === 2 && + (typeof value[0] === `string` || typeof value[0] === `number`) + ) { + return [value[0] as TPrefix, value[1] as TValue] + } + return [NO_PREFIX, value] +} + +function isSingleValue( + value: SingleValue | unknown +): value is SingleValue { + return Array.isArray(value) +} diff --git a/packages/db-ivm/src/valueIndex.ts b/packages/db-ivm/src/valueIndex.ts deleted file mode 100644 index 2470e7aa8..000000000 --- a/packages/db-ivm/src/valueIndex.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { hash } from "./hashing/index.js" - -/** - * A map from a difference collection trace's keys -> (value, multiplicities) that changed. - * Used in operations like join and reduce where the operation needs to - * exploit the key-value structure of the data to run efficiently. - */ -export class ValueIndex { - #inner: Map // Maps key to the value and its multiplicity - - constructor() { - this.#inner = new Map() - } - - toString(indent = false): string { - return `ValueIndex(${JSON.stringify( - [...this.#inner.entries()], - undefined, - indent ? 2 : undefined - )})` - } - - get(key: K): [V, number] | undefined { - return this.#inner.get(key) - } - - getMultiplicity(key: K): number { - return this.get(key)?.[1] ?? 0 - } - - entries() { - return this.#inner.entries() - } - - has(key: K): boolean { - return this.#inner.has(key) - } - - delete(key: K): void { - this.#inner.delete(key) - } - - get size(): number { - return this.#inner.size - } - - addValue(key: K, v: [V, number]): void { - const [value, multiplicity] = v - - if (multiplicity === 0) { - return - } - - if (this.has(key)) { - const [currValue, currMultiplicity] = this.get(key)! - if (hash(value) === hash(currValue)) { - // Update the multiplicity - this.#setMultiplicity(key, value, currMultiplicity + multiplicity) - return - } - // Different value, not allowed. - // ValueIndex only supports one value per key. - throw new Error( - `Cannot add value for key ${key} because it already exists in ValueIndex with a different value` - ) - } - - this.#inner.set(key, [value, multiplicity]) - } - - #setMultiplicity(key: K, value: V, multiplicity: number): void { - if (multiplicity === 0) { - this.#inner.delete(key) - } else { - this.#inner.set(key, [value, multiplicity]) - } - } -} From a6b2ea9932014fc914a3fd721af4ece8f1bfa913 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 13 Sep 2025 19:24:42 +0100 Subject: [PATCH 2/5] comments --- packages/db-ivm/src/indexes.ts | 79 ++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/packages/db-ivm/src/indexes.ts b/packages/db-ivm/src/indexes.ts index 16be2a57b..f3e11541a 100644 --- a/packages/db-ivm/src/indexes.ts +++ b/packages/db-ivm/src/indexes.ts @@ -1,28 +1,57 @@ import { MultiSet } from "./multiset.js" import { hash } from "./hashing/index.js" +import type { Hash } from "./hashing/index.js" +// We use a symbol to represent the absence of a prefix, unprefixed values a stored +// against this key. const NO_PREFIX = Symbol(`NO_PREFIX`) type NO_PREFIX = typeof NO_PREFIX -type Hash = number +// A single value is a tuple of the value and the multiplicity. type SingleValue = [TValue, number] + +// Base map type for the index. Stores single values or prefix maps against a key. type IndexMap = Map< TKey, SingleValue | PrefixMap > + +// Second level map type for the index, stores single values or value maps against a prefix. type PrefixMap = Map< TPrefix | NO_PREFIX, SingleValue | ValueMap > + +// Third level map type for the index, stores single values or value maps against a hash. type ValueMap = Map +/** + * A map from a difference collection trace's keys -> (value, multiplicities) that changed. + * Used in operations like join and reduce where the operation needs to + * exploit the key-value structure of the data to run efficiently. + */ export class Index { + /* + * This index maintains a nested map of keys -> (value, multiplicities), where: + * - initially the values are stored against the key as a single value tuple + * - when a key gets additional values, the values are stored against the key in a + * prefix map + * - the prefix is extract where possible from values that are structured as + * [rowPrimaryKey, rowValue], as they are in the Tanstack DB query pipeline. + * - only when there are multiple values for a given prefix do we fall back to a + * hash to identify identical values, storing them in a third level value map. + */ #inner: IndexMap constructor() { this.#inner = new Map() } + /** + * This method returns a string representation of the index. + * @param indent - Whether to indent the string representation. + * @returns A string representation of the index. + */ toString(indent = false): string { return `Index(${JSON.stringify( [...this.entries()], @@ -31,18 +60,36 @@ export class Index { )})` } + /** + * The size of the index. + */ get size(): number { return this.#inner.size } + /** + * This method checks if the index has a given key. + * @param key - The key to check. + * @returns True if the index has the key, false otherwise. + */ has(key: TKey): boolean { return this.#inner.has(key) } + /** + * This method returns all values for a given key. + * @param key - The key to get the values for. + * @returns An array of value tuples [value, multiplicity]. + */ get(key: TKey): Array<[TValue, number]> { return [...this.getIterator(key)] } + /** + * This method returns an iterator over all values for a given key. + * @param key - The key to get the values for. + * @returns An iterator of value tuples [value, multiplicity]. + */ *getIterator(key: TKey): Iterable<[TValue, number]> { const prefixMapOrSingleValue = this.#inner.get(key) if (isSingleValue(prefixMapOrSingleValue)) { @@ -80,12 +127,17 @@ export class Index { * It returns an iterator that you can use if you need to iterate over the values for a given key. * @returns An iterator of all *keys* in the index and their corresponding value iterator. */ - *#entriesIterators(): Iterable<[TKey, Iterable<[TValue, number]>]> { + *entriesIterators(): Iterable<[TKey, Iterable<[TValue, number]>]> { for (const key of this.#inner.keys()) { yield [key, this.getIterator(key)] } } + /** + * This method adds a value to the index. + * @param key - The key to add the value to. + * @param valueTuple - The value tuple [value, multiplicity] to add to the index. + */ addValue(key: TKey, valueTuple: SingleValue) { const [value, multiplicity] = valueTuple // If the multiplicity is 0, do nothing @@ -232,12 +284,21 @@ export class Index { } } + /** + * This method appends another index to the current index. + * @param other - The index to append to the current index. + */ append(other: Index): void { for (const [key, value] of other.entries()) { this.addValue(key, value) } } + /** + * This method joins two indexes. + * @param other - The index to join with the current index. + * @returns A multiset of the joined values. + */ join( other: Index ): MultiSet<[TKey, [TValue, TValue2]]> { @@ -245,7 +306,7 @@ export class Index { // We want to iterate over the smaller of the two indexes to reduce the // number of operations we need to do. if (this.size <= other.size) { - for (const [key, valueIt] of this.#entriesIterators()) { + for (const [key, valueIt] of this.entriesIterators()) { if (!other.has(key)) continue const otherValues = other.get(key) for (const [val1, mul1] of valueIt) { @@ -257,7 +318,7 @@ export class Index { } } } else { - for (const [key, otherValueIt] of other.#entriesIterators()) { + for (const [key, otherValueIt] of other.entriesIterators()) { if (!this.has(key)) continue const values = this.get(key) for (const [val2, mul2] of otherValueIt) { @@ -274,6 +335,11 @@ export class Index { } } +/** + * This function extracts the prefix from a value. + * @param value - The value to extract the prefix from. + * @returns The prefix and the suffix. + */ function getPrefix( value: TValue ): [TPrefix | NO_PREFIX, TValue] { @@ -291,6 +357,11 @@ function getPrefix( return [NO_PREFIX, value] } +/** + * This function checks if a value is a single value. + * @param value - The value to check. + * @returns True if the value is a single value, false otherwise. + */ function isSingleValue( value: SingleValue | unknown ): value is SingleValue { From d583ced5f0672e6de6f8d6744e544aa5f2f1ad24 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 13 Sep 2025 19:27:31 +0100 Subject: [PATCH 3/5] changeset --- .changeset/odd-mangos-pick.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/odd-mangos-pick.md diff --git a/.changeset/odd-mangos-pick.md b/.changeset/odd-mangos-pick.md new file mode 100644 index 000000000..f9f839817 --- /dev/null +++ b/.changeset/odd-mangos-pick.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db-ivm": patch +--- + +Change the ivm indexes to use a three level `key->prefix->hash->value` structure, only falling back to structural hashing when there are multiple values for a single prefix. This removes all hashing during the initial run of a query delivering a 2-3x speedup. From 0b2785cfe5b2ea38f8345f5e847fb3ac5b7fafc1 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 18 Sep 2025 12:57:55 +0100 Subject: [PATCH 4/5] Allow ValueMap for a key without a PrefixMap --- packages/db-ivm/src/indexes.ts | 124 ++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 39 deletions(-) diff --git a/packages/db-ivm/src/indexes.ts b/packages/db-ivm/src/indexes.ts index f3e11541a..5415186a4 100644 --- a/packages/db-ivm/src/indexes.ts +++ b/packages/db-ivm/src/indexes.ts @@ -10,20 +10,20 @@ type NO_PREFIX = typeof NO_PREFIX // A single value is a tuple of the value and the multiplicity. type SingleValue = [TValue, number] -// Base map type for the index. Stores single values or prefix maps against a key. +// Base map type for the index. Stores single values, prefix maps, or value maps against a key. type IndexMap = Map< TKey, - SingleValue | PrefixMap + SingleValue | PrefixMap | ValueMap > // Second level map type for the index, stores single values or value maps against a prefix. -type PrefixMap = Map< +class PrefixMap extends Map< TPrefix | NO_PREFIX, SingleValue | ValueMap -> +> {} // Third level map type for the index, stores single values or value maps against a hash. -type ValueMap = Map +class ValueMap extends Map {} /** * A map from a difference collection trace's keys -> (value, multiplicities) that changed. @@ -91,13 +91,19 @@ export class Index { * @returns An iterator of value tuples [value, multiplicity]. */ *getIterator(key: TKey): Iterable<[TValue, number]> { - const prefixMapOrSingleValue = this.#inner.get(key) - if (isSingleValue(prefixMapOrSingleValue)) { - yield prefixMapOrSingleValue - } else if (prefixMapOrSingleValue === undefined) { + const mapOrSingleValue = this.#inner.get(key) + if (isSingleValue(mapOrSingleValue)) { + yield mapOrSingleValue + } else if (mapOrSingleValue === undefined) { return + } else if (mapOrSingleValue instanceof ValueMap) { + // Direct ValueMap - all values have NO_PREFIX + for (const valueTuple of mapOrSingleValue.values()) { + yield valueTuple + } } else { - for (const singleValueOrValueMap of prefixMapOrSingleValue.values()) { + // PrefixMap - iterate through all prefixes + for (const singleValueOrValueMap of mapOrSingleValue.values()) { if (isSingleValue(singleValueOrValueMap)) { yield singleValueOrValueMap } else { @@ -143,23 +149,17 @@ export class Index { // If the multiplicity is 0, do nothing if (multiplicity === 0) return - const prefixMapOrSingleValue = this.#inner.get(key) + const mapOrSingleValue = this.#inner.get(key) - if (prefixMapOrSingleValue === undefined) { + if (mapOrSingleValue === undefined) { // This is the first time we see a value for this key we just insert it // into the index as a single value tuple this.#inner.set(key, valueTuple) return } - const [currentSingleValueForKey, prefixMap] = isSingleValue( - prefixMapOrSingleValue - ) - ? [prefixMapOrSingleValue, undefined] - : [undefined, prefixMapOrSingleValue] - - if (currentSingleValueForKey) { - const [currentValue, currentMultiplicity] = currentSingleValueForKey + if (isSingleValue(mapOrSingleValue)) { + const [currentValue, currentMultiplicity] = mapOrSingleValue // We have a single value for this key, lets check if this is the same value // and if so we just update the multiplicity. This is a check if its the same // literal value or object reference. @@ -172,16 +172,13 @@ export class Index { } return } - } - - // Get the prefix of the new value - const [prefix, suffix] = getPrefix(value) - if (currentSingleValueForKey) { - const [currentValue, currentMultiplicity] = currentSingleValueForKey + // Get the prefix of both values + const [prefix, suffix] = getPrefix(value) const [currentPrefix, currentSuffix] = getPrefix( currentValue ) + if ( currentPrefix === prefix && (currentSuffix === suffix || hash(currentSuffix) === hash(suffix)) @@ -194,34 +191,74 @@ export class Index { this.#inner.set(key, [value, newMultiplicity]) } return + } + + // They are different values - decide between ValueMap or PrefixMap + if (currentPrefix === NO_PREFIX && prefix === NO_PREFIX) { + // Both values have NO_PREFIX, use ValueMap directly + const valueMap = new ValueMap() + valueMap.set(hash(currentSuffix), mapOrSingleValue) + valueMap.set(hash(suffix), valueTuple) + this.#inner.set(key, valueMap) + return } else { - // They are different values, so we need to move the current value to a - // new prefix map - const newPrefixMap = new Map< - TPrefix | NO_PREFIX, - SingleValue | ValueMap - >() + // At least one has a prefix, use PrefixMap + const newPrefixMap = new PrefixMap() this.#inner.set(key, newPrefixMap) if (currentPrefix === prefix) { // They have the same prefix but different suffixes, so we need to add a // value map for this suffix to the prefix map - const valueMap = new Map() - valueMap.set(hash(currentSuffix), currentSingleValueForKey) + const valueMap = new ValueMap() + valueMap.set(hash(currentSuffix), mapOrSingleValue) valueMap.set(hash(suffix), valueTuple) newPrefixMap.set(currentPrefix, valueMap) } else { - // They have different prefixes, so we can add then as singe values to the + // They have different prefixes, so we can add then as single values to the // prefix map - newPrefixMap.set(currentPrefix, currentSingleValueForKey) + newPrefixMap.set(currentPrefix, mapOrSingleValue) newPrefixMap.set(prefix, valueTuple) } return } } - // At this point there is a prefix map for this key, we need the value map or - // single value for this prefix + // At this point we have either a ValueMap or PrefixMap + const [prefix, suffix] = getPrefix(value) + + if (mapOrSingleValue instanceof ValueMap) { + // Direct ValueMap - all values have NO_PREFIX + if (prefix !== NO_PREFIX) { + // This value has a prefix but existing values don't - need to convert to PrefixMap + const newPrefixMap = new PrefixMap() + newPrefixMap.set(NO_PREFIX, mapOrSingleValue) + newPrefixMap.set(prefix, valueTuple) + this.#inner.set(key, newPrefixMap) + return + } + + // Both existing and new values have NO_PREFIX, add to ValueMap + const suffixHash = hash(suffix) + const currentValueForSuffix = mapOrSingleValue.get(suffixHash) + if (currentValueForSuffix) { + const [, currentMultiplicity] = currentValueForSuffix + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + mapOrSingleValue.delete(suffixHash) + if (mapOrSingleValue.size === 0) { + this.#inner.delete(key) + } + } else { + mapOrSingleValue.set(suffixHash, [value, newMultiplicity]) + } + } else { + mapOrSingleValue.set(suffixHash, valueTuple) + } + return + } + + // PrefixMap case + const prefixMap = mapOrSingleValue const valueMapOrSingleValue = prefixMap.get(prefix) const [valueMap, currentSingleValueForPrefix] = isSingleValue( @@ -243,6 +280,9 @@ export class Index { const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { prefixMap.delete(prefix) + if (prefixMap.size === 0) { + this.#inner.delete(key) + } } else { prefixMap.set(prefix, [value, newMultiplicity]) } @@ -250,7 +290,7 @@ export class Index { } else { // They have different suffixes, so we need to add a value map for this suffix // to the prefix map - const valueMap = new Map() + const valueMap = new ValueMap() valueMap.set(hash(currentSuffix), currentSingleValueForPrefix) valueMap.set(hash(suffix), valueTuple) prefixMap.set(prefix, valueMap) @@ -276,6 +316,12 @@ export class Index { const newMultiplicity = currentMultiplicity + multiplicity if (newMultiplicity === 0) { valueMap.delete(suffixHash) + if (valueMap.size === 0) { + prefixMap.delete(prefix) + if (prefixMap.size === 0) { + this.#inner.delete(key) + } + } } else { valueMap.set(suffixHash, [value, newMultiplicity]) } From c0674b13ba42a8e7911257423a1f77e53611e0a0 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Thu, 18 Sep 2025 14:39:13 +0100 Subject: [PATCH 5/5] refactor --- packages/db-ivm/src/indexes.ts | 358 ++++++++++++++++++--------------- 1 file changed, 197 insertions(+), 161 deletions(-) diff --git a/packages/db-ivm/src/indexes.ts b/packages/db-ivm/src/indexes.ts index 5415186a4..3c52614eb 100644 --- a/packages/db-ivm/src/indexes.ts +++ b/packages/db-ivm/src/indexes.ts @@ -1,3 +1,38 @@ +/** + * # Optimized Index Data Structure + * + * Multi-level index that adapts storage strategy based on data patterns to minimize memory + * usage, eliminate wasteful lookups, and avoid hashing whenever possible. + * + * ## Storage Strategy + * + * **Single value**: `IndexMap['key'] → [value, multiplicity]` (no hashing needed) + * + * **Multiple unprefixed values**: Direct ValueMap (avoids NO_PREFIX lookup) + * ``` + * IndexMap['key'] → ValueMap { hash(value1) → [value1, mult1], ... } + * ``` + * + * **Values with prefixes**: PrefixMap uses prefix keys directly (no hashing) + * ``` + * IndexMap['key'] → PrefixMap { 'prefix1' → [value1, mult1], NO_PREFIX → ValueMap{...} } + * ``` + * + * **Multiple values per prefix**: ValueMap within PrefixMap (hash only suffixes) + * ``` + * PrefixMap['prefix'] → ValueMap { hash(suffix1) → [full_value1, mult1], ... } + * ``` + * + * ## Dynamic Evolution + * + * Structure automatically evolves as data is added: + * - Single → ValueMap (when both values unprefixed) + * - Single → PrefixMap (when at least one prefixed) + * - ValueMap → PrefixMap (adding prefixed value to unprefixed) + * + * Prefixes extracted from array values: `['prefix', 'suffix']` → prefix='prefix' + */ + import { MultiSet } from "./multiset.js" import { hash } from "./hashing/index.js" import type { Hash } from "./hashing/index.js" @@ -20,10 +55,83 @@ type IndexMap = Map< class PrefixMap extends Map< TPrefix | NO_PREFIX, SingleValue | ValueMap -> {} +> { + /** + * Add a value to the PrefixMap. Returns true if the map becomes empty after the operation. + */ + addValue(value: TValue, multiplicity: number): boolean { + if (multiplicity === 0) return this.size === 0 + + const prefix = getPrefix(value) + const valueMapOrSingleValue = this.get(prefix) + + if (isSingleValue(valueMapOrSingleValue)) { + const [currentValue, currentMultiplicity] = valueMapOrSingleValue + const currentPrefix = getPrefix(currentValue) + + if (currentPrefix !== prefix) { + throw new Error(`Mismatching prefixes, this should never happen`) + } + + if (currentValue === value || hash(currentValue) === hash(value)) { + // Same value, update multiplicity + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.delete(prefix) + } else { + this.set(prefix, [value, newMultiplicity]) + } + } else { + // Different suffixes, need to create ValueMap + const valueMap = new ValueMap() + valueMap.set(hash(currentValue), valueMapOrSingleValue) + valueMap.set(hash(value), [value, multiplicity]) + this.set(prefix, valueMap) + } + } else if (valueMapOrSingleValue === undefined) { + // No existing value for this prefix + this.set(prefix, [value, multiplicity]) + } else { + // Existing ValueMap + const isEmpty = valueMapOrSingleValue.addValue(value, multiplicity) + if (isEmpty) { + this.delete(prefix) + } + } + + return this.size === 0 + } +} // Third level map type for the index, stores single values or value maps against a hash. -class ValueMap extends Map {} +class ValueMap extends Map { + /** + * Add a value to the ValueMap. Returns true if the map becomes empty after the operation. + * @param value - The full value to store + * @param multiplicity - The multiplicity to add + * @param hashKey - Optional hash key to use instead of hashing the full value (used when in PrefixMap context) + */ + addValue(value: TValue, multiplicity: number): boolean { + if (multiplicity === 0) return this.size === 0 + + const key = hash(value) + const currentValue = this.get(key) + + if (currentValue) { + const [, currentMultiplicity] = currentValue + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.delete(key) + } else { + this.set(key, [value, newMultiplicity]) + } + } else { + this.set(key, [value, multiplicity]) + } + + return this.size === 0 + } +} /** * A map from a difference collection trace's keys -> (value, multiplicities) that changed. @@ -152,181 +260,111 @@ export class Index { const mapOrSingleValue = this.#inner.get(key) if (mapOrSingleValue === undefined) { - // This is the first time we see a value for this key we just insert it - // into the index as a single value tuple + // First value for this key this.#inner.set(key, valueTuple) return } if (isSingleValue(mapOrSingleValue)) { - const [currentValue, currentMultiplicity] = mapOrSingleValue - // We have a single value for this key, lets check if this is the same value - // and if so we just update the multiplicity. This is a check if its the same - // literal value or object reference. - if (currentValue === value) { - const newMultiplicity = currentMultiplicity + multiplicity - if (newMultiplicity === 0) { - this.#inner.delete(key) - } else { - this.#inner.set(key, [value, newMultiplicity]) - } - return - } - - // Get the prefix of both values - const [prefix, suffix] = getPrefix(value) - const [currentPrefix, currentSuffix] = getPrefix( - currentValue + // Handle transition from single value to map + this.#handleSingleValueTransition( + key, + mapOrSingleValue, + value, + multiplicity ) + return + } - if ( - currentPrefix === prefix && - (currentSuffix === suffix || hash(currentSuffix) === hash(suffix)) - ) { - // They are the same value, so we just update the multiplicity - const newMultiplicity = currentMultiplicity + multiplicity - if (newMultiplicity === 0) { + if (mapOrSingleValue instanceof ValueMap) { + // Handle existing ValueMap + const prefix = getPrefix(value) + if (prefix !== NO_PREFIX) { + // Convert ValueMap to PrefixMap since we have a prefixed value + const prefixMap = new PrefixMap() + prefixMap.set(NO_PREFIX, mapOrSingleValue) + prefixMap.set(prefix, valueTuple) + this.#inner.set(key, prefixMap) + } else { + // Add to existing ValueMap + const isEmpty = mapOrSingleValue.addValue(value, multiplicity) + if (isEmpty) { this.#inner.delete(key) - } else { - this.#inner.set(key, [value, newMultiplicity]) } - return } - - // They are different values - decide between ValueMap or PrefixMap - if (currentPrefix === NO_PREFIX && prefix === NO_PREFIX) { - // Both values have NO_PREFIX, use ValueMap directly - const valueMap = new ValueMap() - valueMap.set(hash(currentSuffix), mapOrSingleValue) - valueMap.set(hash(suffix), valueTuple) - this.#inner.set(key, valueMap) - return - } else { - // At least one has a prefix, use PrefixMap - const newPrefixMap = new PrefixMap() - this.#inner.set(key, newPrefixMap) - - if (currentPrefix === prefix) { - // They have the same prefix but different suffixes, so we need to add a - // value map for this suffix to the prefix map - const valueMap = new ValueMap() - valueMap.set(hash(currentSuffix), mapOrSingleValue) - valueMap.set(hash(suffix), valueTuple) - newPrefixMap.set(currentPrefix, valueMap) - } else { - // They have different prefixes, so we can add then as single values to the - // prefix map - newPrefixMap.set(currentPrefix, mapOrSingleValue) - newPrefixMap.set(prefix, valueTuple) - } - return + } else { + // Handle existing PrefixMap + const isEmpty = mapOrSingleValue.addValue(value, multiplicity) + if (isEmpty) { + this.#inner.delete(key) } } + } - // At this point we have either a ValueMap or PrefixMap - const [prefix, suffix] = getPrefix(value) - - if (mapOrSingleValue instanceof ValueMap) { - // Direct ValueMap - all values have NO_PREFIX - if (prefix !== NO_PREFIX) { - // This value has a prefix but existing values don't - need to convert to PrefixMap - const newPrefixMap = new PrefixMap() - newPrefixMap.set(NO_PREFIX, mapOrSingleValue) - newPrefixMap.set(prefix, valueTuple) - this.#inner.set(key, newPrefixMap) - return - } + /** + * Handle the transition from a single value to either a ValueMap or PrefixMap + */ + #handleSingleValueTransition( + key: TKey, + currentSingleValue: SingleValue, + newValue: TValue, + multiplicity: number + ) { + const [currentValue, currentMultiplicity] = currentSingleValue - // Both existing and new values have NO_PREFIX, add to ValueMap - const suffixHash = hash(suffix) - const currentValueForSuffix = mapOrSingleValue.get(suffixHash) - if (currentValueForSuffix) { - const [, currentMultiplicity] = currentValueForSuffix - const newMultiplicity = currentMultiplicity + multiplicity - if (newMultiplicity === 0) { - mapOrSingleValue.delete(suffixHash) - if (mapOrSingleValue.size === 0) { - this.#inner.delete(key) - } - } else { - mapOrSingleValue.set(suffixHash, [value, newMultiplicity]) - } + // Check for exact same value (reference equality) + if (currentValue === newValue) { + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.#inner.delete(key) } else { - mapOrSingleValue.set(suffixHash, valueTuple) + this.#inner.set(key, [newValue, newMultiplicity]) } return } - // PrefixMap case - const prefixMap = mapOrSingleValue - const valueMapOrSingleValue = prefixMap.get(prefix) + // Get prefixes for both values + const newPrefix = getPrefix(newValue) + const currentPrefix = getPrefix(currentValue) - const [valueMap, currentSingleValueForPrefix] = isSingleValue( - valueMapOrSingleValue - ) - ? [undefined, valueMapOrSingleValue] - : [valueMapOrSingleValue, undefined] - - if (currentSingleValueForPrefix) { - const [currentValue, currentMultiplicity] = currentSingleValueForPrefix - const [currentPrefix, currentSuffix] = getPrefix( - currentValue - ) - if (currentPrefix !== prefix) { - throw new Error(`Mismatching prefixes, this should never happen`) - } - if (currentSuffix === suffix || hash(currentSuffix) === hash(suffix)) { - // They are the same value, so we just update the multiplicity - const newMultiplicity = currentMultiplicity + multiplicity - if (newMultiplicity === 0) { - prefixMap.delete(prefix) - if (prefixMap.size === 0) { - this.#inner.delete(key) - } - } else { - prefixMap.set(prefix, [value, newMultiplicity]) - } - return + // Check if they're the same value by prefix/suffix comparison + if ( + currentPrefix === newPrefix && + (currentValue === newValue || hash(currentValue) === hash(newValue)) + ) { + const newMultiplicity = currentMultiplicity + multiplicity + if (newMultiplicity === 0) { + this.#inner.delete(key) } else { - // They have different suffixes, so we need to add a value map for this suffix - // to the prefix map - const valueMap = new ValueMap() - valueMap.set(hash(currentSuffix), currentSingleValueForPrefix) - valueMap.set(hash(suffix), valueTuple) - prefixMap.set(prefix, valueMap) - return + this.#inner.set(key, [newValue, newMultiplicity]) } - } - - // At this point there was no single value for the prefix, there *may* be - // a value map for this prefix. If there is not, we can just add the new value - // as a single value to the prefix map - if (!valueMap) { - prefixMap.set(prefix, valueTuple) return } - // We now know there is a value map for this prefix, we need see if there is a - // current value for the suffix. If there is, we update the multiplicity, otherwise - // we add the new value as a single value to the value map - const suffixHash = hash(suffix) - const currentValueForSuffix = valueMap.get(suffixHash) - if (currentValueForSuffix) { - const [, currentMultiplicity] = currentValueForSuffix - const newMultiplicity = currentMultiplicity + multiplicity - if (newMultiplicity === 0) { - valueMap.delete(suffixHash) - if (valueMap.size === 0) { - prefixMap.delete(prefix) - if (prefixMap.size === 0) { - this.#inner.delete(key) - } - } + // Different values - choose appropriate map type + if (currentPrefix === NO_PREFIX && newPrefix === NO_PREFIX) { + // Both have NO_PREFIX, use ValueMap directly + const valueMap = new ValueMap() + valueMap.set(hash(currentValue), currentSingleValue) + valueMap.set(hash(newValue), [newValue, multiplicity]) + this.#inner.set(key, valueMap) + } else { + // At least one has a prefix, use PrefixMap + const prefixMap = new PrefixMap() + + if (currentPrefix === newPrefix) { + // Same prefix, different suffixes - need ValueMap within PrefixMap + const valueMap = new ValueMap() + valueMap.set(hash(currentValue), currentSingleValue) + valueMap.set(hash(newValue), [newValue, multiplicity]) + prefixMap.set(currentPrefix, valueMap) } else { - valueMap.set(suffixHash, [value, newMultiplicity]) + // Different prefixes - store as separate single values + prefixMap.set(currentPrefix, currentSingleValue) + prefixMap.set(newPrefix, [newValue, multiplicity]) } - } else { - valueMap.set(suffixHash, valueTuple) + + this.#inner.set(key, prefixMap) } } @@ -386,21 +424,19 @@ export class Index { * @param value - The value to extract the prefix from. * @returns The prefix and the suffix. */ -function getPrefix( - value: TValue -): [TPrefix | NO_PREFIX, TValue] { - // If the value is an array of two elements and the first element is a string - // or number, then the first element is the prefix. This is used to distinguish - // between values without the need for hashing unless there are multiple values - // for the same prefix. +function getPrefix(value: TValue): TPrefix | NO_PREFIX { + // If the value is an array and the first element is a string or number, then the + // first element is the prefix. This is used to distinguish between values without + // the need for hashing unless there are multiple values for the same prefix. if ( Array.isArray(value) && - value.length === 2 && - (typeof value[0] === `string` || typeof value[0] === `number`) + (typeof value[0] === `string` || + typeof value[0] === `number` || + typeof value[0] === `bigint`) ) { - return [value[0] as TPrefix, value[1] as TValue] + return value[0] as TPrefix } - return [NO_PREFIX, value] + return NO_PREFIX } /**