Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions rust/lance-core/src/utils/address.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,31 @@

use std::ops::Range;

/// A row address encodes a fragment ID (upper 32 bits) and row offset (lower 32 bits).
///
/// ```
/// use lance_core::utils::address::RowAddress;
///
/// let addr = RowAddress::new_from_parts(5, 100);
/// assert_eq!(addr.fragment_id(), 5);
/// assert_eq!(addr.row_offset(), 100);
///
/// // Convert to/from u64
/// let raw: u64 = addr.into();
/// let addr2: RowAddress = raw.into();
/// assert_eq!(addr, addr2);
///
/// // Display format
/// assert_eq!(format!("{}", addr), "(5, 100)");
/// ```
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RowAddress(u64);

impl RowAddress {
pub const FRAGMENT_SIZE: u64 = 1 << 32;
// A fragment id that will never be used
/// A fragment id that will never be used.
pub const TOMBSTONE_FRAG: u32 = 0xffffffff;
// A row id that will never be used
/// A row id that will never be used.
pub const TOMBSTONE_ROW: u64 = 0xffffffffffffffff;

pub fn new_from_u64(row_addr: u64) -> Self {
Expand All @@ -21,10 +38,20 @@ impl RowAddress {
Self(((fragment_id as u64) << 32) | row_offset as u64)
}

/// Returns the address for the first row of a fragment.
pub fn first_row(fragment_id: u32) -> Self {
Self::new_from_parts(fragment_id, 0)
}

/// Returns the range of u64 addresses for a given fragment.
///
/// ```
/// use lance_core::utils::address::RowAddress;
///
/// let range = RowAddress::address_range(2);
/// assert_eq!(range.start, 2 * RowAddress::FRAGMENT_SIZE);
/// assert_eq!(range.end, 3 * RowAddress::FRAGMENT_SIZE);
/// ```
pub fn address_range(fragment_id: u32) -> Range<u64> {
u64::from(Self::first_row(fragment_id))..u64::from(Self::first_row(fragment_id + 1))
}
Expand Down Expand Up @@ -61,3 +88,29 @@ impl std::fmt::Display for RowAddress {
write!(f, "({}, {})", self.fragment_id(), self.row_offset())
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_row_address() {
// new_from_u64 (not in doctest)
let addr = RowAddress::new_from_u64(0x0000_0001_0000_0002);
assert_eq!(addr.fragment_id(), 1);
assert_eq!(addr.row_offset(), 2);

// address_range uses first_row internally (coverage)
let range = RowAddress::address_range(3);
assert_eq!(range.start, 3 * RowAddress::FRAGMENT_SIZE);

// From impls with different values than doctest
let addr2 = RowAddress::new_from_parts(7, 8);
let raw: u64 = addr2.into();
let addr3: RowAddress = raw.into();
assert_eq!(addr2, addr3);

// Debug format (doctest only tests Display)
assert_eq!(format!("{:?}", addr), "(1, 2)");
}
}
38 changes: 38 additions & 0 deletions rust/lance-core/src/utils/backoff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,44 @@ mod tests {
assert_eq!(backoff.attempt(), 4);
}

#[test]
fn test_backoff_with_base() {
let mut backoff = Backoff::default().with_base(3).with_jitter(0);
assert_eq!(backoff.next_backoff().as_millis(), 50); // 3^0 * 50
assert_eq!(backoff.next_backoff().as_millis(), 150); // 3^1 * 50
assert_eq!(backoff.next_backoff().as_millis(), 450); // 3^2 * 50
}

#[test]
fn test_backoff_with_unit() {
let mut backoff = Backoff::default().with_unit(100).with_jitter(0);
assert_eq!(backoff.next_backoff().as_millis(), 100); // 2^0 * 100
assert_eq!(backoff.next_backoff().as_millis(), 200); // 2^1 * 100
}

#[test]
fn test_backoff_with_min() {
let mut backoff = Backoff::default().with_min(100).with_jitter(0);
assert_eq!(backoff.next_backoff().as_millis(), 100); // clamped to min
}

#[test]
fn test_backoff_with_max() {
let mut backoff = Backoff::default().with_max(75).with_jitter(0);
assert_eq!(backoff.next_backoff().as_millis(), 50);
assert_eq!(backoff.next_backoff().as_millis(), 75); // clamped to max
}

#[test]
fn test_backoff_reset() {
let mut backoff = Backoff::default().with_jitter(0);
assert_eq!(backoff.next_backoff().as_millis(), 50);
assert_eq!(backoff.attempt(), 1);
backoff.reset();
assert_eq!(backoff.attempt(), 0);
assert_eq!(backoff.next_backoff().as_millis(), 50);
}

#[test]
fn test_slot_backoff() {
#[cfg_attr(coverage, coverage(off))]
Expand Down
71 changes: 69 additions & 2 deletions rust/lance-core/src/utils/bit.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,61 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright The Lance Authors

/// Returns true if the given number is a power of two.
///
/// ```
/// use lance_core::utils::bit::is_pwr_two;
///
/// assert!(is_pwr_two(1));
/// assert!(is_pwr_two(2));
/// assert!(is_pwr_two(1024));
/// assert!(!is_pwr_two(3));
/// assert!(!is_pwr_two(1000));
/// ```
pub fn is_pwr_two(n: u64) -> bool {
n & (n - 1) == 0
}

/// Returns the number of padding bytes needed to align `n` to `ALIGN`.
///
/// ```
/// use lance_core::utils::bit::pad_bytes;
///
/// assert_eq!(pad_bytes::<8>(0), 0);
/// assert_eq!(pad_bytes::<8>(1), 7);
/// assert_eq!(pad_bytes::<8>(8), 0);
/// assert_eq!(pad_bytes::<8>(9), 7);
/// ```
pub fn pad_bytes<const ALIGN: usize>(n: usize) -> usize {
debug_assert!(is_pwr_two(ALIGN as u64));
(ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1)
}

/// Returns the number of padding bytes needed to align `n` to `align`.
///
/// ```
/// use lance_core::utils::bit::pad_bytes_to;
///
/// assert_eq!(pad_bytes_to(0, 8), 0);
/// assert_eq!(pad_bytes_to(1, 8), 7);
/// assert_eq!(pad_bytes_to(8, 8), 0);
/// assert_eq!(pad_bytes_to(9, 8), 7);
/// ```
pub fn pad_bytes_to(n: usize, align: usize) -> usize {
debug_assert!(is_pwr_two(align as u64));
(align - (n & (align - 1))) & (align - 1)
}

/// Returns the number of padding bytes needed to align `n` to `ALIGN` (u64 version).
///
/// ```
/// use lance_core::utils::bit::pad_bytes_u64;
///
/// assert_eq!(pad_bytes_u64::<8>(0), 0);
/// assert_eq!(pad_bytes_u64::<8>(1), 7);
/// assert_eq!(pad_bytes_u64::<8>(8), 0);
/// assert_eq!(pad_bytes_u64::<8>(9), 7);
/// ```
pub fn pad_bytes_u64<const ALIGN: u64>(n: u64) -> u64 {
debug_assert!(is_pwr_two(ALIGN));
(ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1)
Expand All @@ -32,9 +73,18 @@ const LOG_TABLE_256: [u8; 256] = [
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
];

/// Returns the number of bits needed to represent the given number
/// Returns the number of bits needed to represent the given number.
///
/// Inspired by <https://graphics.stanford.edu/~seander/bithacks.html>
///
/// ```
/// use lance_core::utils::bit::log_2_ceil;
///
/// assert_eq!(log_2_ceil(1), 1);
/// assert_eq!(log_2_ceil(2), 2);
/// assert_eq!(log_2_ceil(255), 8);
/// assert_eq!(log_2_ceil(256), 9);
/// ```
pub fn log_2_ceil(val: u32) -> u32 {
assert!(val > 0);
let upper_half = val >> 16;
Expand All @@ -61,10 +111,24 @@ pub fn log_2_ceil(val: u32) -> u32 {

#[cfg(test)]
pub mod tests {
use crate::utils::bit::log_2_ceil;
use crate::utils::bit::{is_pwr_two, log_2_ceil, pad_bytes, pad_bytes_to, pad_bytes_u64};

#[test]
fn test_bit_utils() {
// Test values not in doctests
assert!(is_pwr_two(4));
assert!(is_pwr_two(1024));
assert!(!is_pwr_two(5));

// Test different alignment (64) not shown in doctests
assert_eq!(pad_bytes::<64>(100), 28);
assert_eq!(pad_bytes_to(100, 64), 28);
assert_eq!(pad_bytes_u64::<64>(100), 28);
}

#[test]
fn test_log_2_ceil() {
#[cfg_attr(coverage, coverage(off))]
fn classic_approach(mut val: u32) -> u32 {
let mut counter = 0;
while val > 0 {
Expand All @@ -82,5 +146,8 @@ pub mod tests {
log_2_ceil(1024 * 1024 * 1024),
classic_approach(1024 * 1024 * 1024)
);
// Cover the branch where upper_half != 0 but first_quarter == 0
// (value between 2^16 and 2^24)
assert_eq!(log_2_ceil(100_000), classic_approach(100_000));
}
}
36 changes: 33 additions & 3 deletions rust/lance-core/src/utils/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,25 @@

use std::hash::Hasher;

// A wrapper for &[u8] to allow &[u8] as hash keys,
// the equality for this `U8SliceKey` means that the &[u8] contents are equal.
#[derive(Eq)]
/// A wrapper for `&[u8]` to allow byte slices as hash keys.
///
/// ```
/// use lance_core::utils::hash::U8SliceKey;
/// use std::collections::HashMap;
///
/// let mut map: HashMap<U8SliceKey, i32> = HashMap::new();
/// map.insert(U8SliceKey(&[1, 2, 3]), 42);
///
/// assert_eq!(map.get(&U8SliceKey(&[1, 2, 3])), Some(&42));
/// assert_eq!(map.get(&U8SliceKey(&[1, 2, 4])), None);
///
/// // Equality is based on slice contents
/// assert_eq!(U8SliceKey(&[1, 2, 3]), U8SliceKey(&[1, 2, 3]));
/// assert_ne!(U8SliceKey(&[1, 2, 3]), U8SliceKey(&[1, 2, 4]));
/// ```
#[derive(Debug, Eq)]
pub struct U8SliceKey<'a>(pub &'a [u8]);

impl PartialEq for U8SliceKey<'_> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
Expand All @@ -18,3 +33,18 @@ impl std::hash::Hash for U8SliceKey<'_> {
self.0.hash(state);
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::collections::HashMap;

#[test]
fn test_u8_slice_key() {
// Test cases not in doctest: key not found, inequality
let mut map = HashMap::new();
map.insert(U8SliceKey(&[1, 2, 3]), 42);
assert_eq!(map.get(&U8SliceKey(&[4, 5, 6])), None);
assert_ne!(U8SliceKey(&[1]), U8SliceKey(&[2]));
}
}
Loading