-
Notifications
You must be signed in to change notification settings - Fork 183
Optimize joins to use index when possible #335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6e3dccf
69ff884
9b62327
98ba8b8
2a2fa9a
3ac1208
02e04a3
69bb8ba
7140573
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| --- | ||
| "@tanstack/db-ivm": patch | ||
| "@tanstack/db": patch | ||
| --- | ||
|
|
||
| Optimize joins to use index on the join key when available. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| import { DifferenceStreamWriter, LinearUnaryOperator } from "../graph.js" | ||
| import { StreamBuilder } from "../d2.js" | ||
| import type { IStreamBuilder, PipedOperator } from "../types.js" | ||
| import type { DifferenceStreamReader } from "../graph.js" | ||
| import type { MultiSet } from "../multiset.js" | ||
|
|
||
| /** | ||
| * Operator that applies a function to each element in the input stream | ||
| */ | ||
| export class TapOperator<T> extends LinearUnaryOperator<T, T> { | ||
| #f: (data: T) => void | ||
|
|
||
| constructor( | ||
| id: number, | ||
| inputA: DifferenceStreamReader<T>, | ||
| output: DifferenceStreamWriter<T>, | ||
| f: (data: T) => void | ||
| ) { | ||
| super(id, inputA, output) | ||
| this.#f = f | ||
| } | ||
|
|
||
| inner(collection: MultiSet<T>): MultiSet<T> { | ||
| return collection.map((data) => { | ||
| this.#f(data) | ||
| return data | ||
| }) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Invokes a function for each element in the input stream. | ||
| * This operator doesn't modify the stream and is used to perform side effects. | ||
| * @param f - The function to invoke on each element | ||
| * @returns The input stream | ||
| */ | ||
| export function tap<T>(f: (data: T) => void): PipedOperator<T, T> { | ||
| return (stream: IStreamBuilder<T>): IStreamBuilder<T> => { | ||
| const output = new StreamBuilder<T>( | ||
| stream.graph, | ||
| new DifferenceStreamWriter<T>() | ||
| ) | ||
| const operator = new TapOperator<T>( | ||
| stream.graph.getNextOperatorId(), | ||
| stream.connectReader(), | ||
| output.writer, | ||
| f | ||
| ) | ||
| stream.graph.addOperator(operator) | ||
| stream.graph.addStream(output.connectReader()) | ||
| return output | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,55 @@ export interface AutoIndexConfig { | |
| autoIndex?: `off` | `eager` | ||
| } | ||
|
|
||
| function shouldAutoIndex(collection: CollectionImpl<any, any, any, any, any>) { | ||
| // Only proceed if auto-indexing is enabled | ||
| if (collection.config.autoIndex !== `eager`) { | ||
| return false | ||
| } | ||
|
|
||
| // Don't auto-index during sync operations | ||
| if ( | ||
| collection.status === `loading` || | ||
| collection.status === `initialCommit` | ||
| ) { | ||
| return false | ||
| } | ||
|
|
||
| return true | ||
| } | ||
|
|
||
| export function ensureIndexForField< | ||
| T extends Record<string, any>, | ||
| TKey extends string | number, | ||
| >( | ||
| fieldName: string, | ||
| fieldPath: Array<string>, | ||
| collection: CollectionImpl<T, TKey, any, any, any> | ||
| ) { | ||
| if (!shouldAutoIndex(collection)) { | ||
| return | ||
| } | ||
|
|
||
| // Check if we already have an index for this field | ||
| const existingIndex = Array.from(collection.indexes.values()).find((index) => | ||
| index.matchesField(fieldPath) | ||
| ) | ||
|
|
||
| if (existingIndex) { | ||
| return // Index already exists | ||
| } | ||
|
|
||
| // Create a new index for this field using the collection's createIndex method | ||
| try { | ||
| collection.createIndex((row) => (row as any)[fieldName], { | ||
| name: `auto_${fieldName}`, | ||
| indexType: BTreeIndex, | ||
| }) | ||
| } catch (error) { | ||
| console.warn(`Failed to create auto-index for field "${fieldName}":`, error) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we've used the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't actually change this. I took this piece of code from |
||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Analyzes a where expression and creates indexes for all simple operations on single fields | ||
| */ | ||
|
|
@@ -16,44 +65,15 @@ export function ensureIndexForExpression< | |
| expression: BasicExpression, | ||
| collection: CollectionImpl<T, TKey, any, any, any> | ||
| ): void { | ||
| // Only proceed if auto-indexing is enabled | ||
| if (collection.config.autoIndex !== `eager`) { | ||
| return | ||
| } | ||
|
|
||
| // Don't auto-index during sync operations | ||
| if ( | ||
| collection.status === `loading` || | ||
| collection.status === `initialCommit` | ||
| ) { | ||
| if (!shouldAutoIndex(collection)) { | ||
| return | ||
| } | ||
|
|
||
| // Extract all indexable expressions and create indexes for them | ||
| const indexableExpressions = extractIndexableExpressions(expression) | ||
|
|
||
| for (const { fieldName, fieldPath } of indexableExpressions) { | ||
| // Check if we already have an index for this field | ||
| const existingIndex = Array.from(collection.indexes.values()).find( | ||
| (index) => index.matchesField(fieldPath) | ||
| ) | ||
|
|
||
| if (existingIndex) { | ||
| continue // Index already exists | ||
| } | ||
|
|
||
| // Create a new index for this field using the collection's createIndex method | ||
| try { | ||
| collection.createIndex((row) => (row as any)[fieldName], { | ||
| name: `auto_${fieldName}`, | ||
| indexType: BTreeIndex, | ||
| }) | ||
| } catch (error) { | ||
| console.warn( | ||
| `Failed to create auto-index for field "${fieldName}":`, | ||
| error | ||
| ) | ||
| } | ||
| ensureIndexForField(fieldName, fieldPath, collection) | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It could be useful in future to pass the multiplicity to the callback so that it's aware if it's an insert/delete. Not important for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can add this later when we need it :-)