Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions rust/lance-encoding/src/encodings/logical/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ struct ChunkMeta {
}

/// A mini-block chunk that has been decoded and decompressed
#[derive(Debug)]
#[derive(Debug, Clone)]
struct DecodedMiniBlockChunk {
rep: Option<ScalarBuffer<u16>>,
def: Option<ScalarBuffer<u16>>,
Expand Down Expand Up @@ -530,13 +530,42 @@ impl DecodePageTask for DecodeMiniBlockTask {

// We need to keep track of the offset into repbuf/defbuf that we are building up
let mut level_offset = 0;

// Pre-compute caching needs for each chunk by checking if the next chunk is the same
let needs_caching: Vec<bool> = self
.instructions
.windows(2)
.map(|w| w[0].1.chunk_idx == w[1].1.chunk_idx)
.chain(std::iter::once(false)) // the last one never needs caching
.collect();

// Cache for storing decoded chunks when beneficial
let mut chunk_cache: Option<(usize, DecodedMiniBlockChunk)> = None;

// Now we iterate through each instruction and process it
for (instructions, chunk) in self.instructions.iter() {
// TODO: It's very possible that we have duplicate `buf` in self.instructions and we
// don't want to decode the buf again and again on the same thread.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR partially addresses this TODO. It improves performance unless chunks are accessed in a fully random pattern, which would require a HashMap-based cache at the cost of higher memory usage.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chunks should always be accessed sequentially I believe. We have a requirement at some point in the decoding process for offsets / ranges to be in sorted order.

for (idx, (instructions, chunk)) in self.instructions.iter().enumerate() {
let should_cache_this_chunk = needs_caching[idx];

let decoded_chunk = match &chunk_cache {
Some((cached_chunk_idx, ref cached_chunk))
if *cached_chunk_idx == chunk.chunk_idx =>
{
// Clone only when we have a cache hit (much cheaper than decoding)
cached_chunk.clone()
}
_ => {
// Cache miss, need to decode
let decoded = self.decode_miniblock_chunk(&chunk.data, chunk.items_in_chunk)?;

// Only update cache if this chunk will benefit the next access
if should_cache_this_chunk {
chunk_cache = Some((chunk.chunk_idx, decoded.clone()));
}
decoded
}
};

let DecodedMiniBlockChunk { rep, def, values } =
self.decode_miniblock_chunk(&chunk.data, chunk.items_in_chunk)?;
let DecodedMiniBlockChunk { rep, def, values } = decoded_chunk;

// Our instructions tell us which rows we want to take from this chunk
let row_range_start =
Expand Down
Loading