diff --git a/rust/lance-core/src/utils/address.rs b/rust/lance-core/src/utils/address.rs index 6b0ba882d69..37512ca1e04 100644 --- a/rust/lance-core/src/utils/address.rs +++ b/rust/lance-core/src/utils/address.rs @@ -3,14 +3,31 @@ use std::ops::Range; +/// A row address encodes a fragment ID (upper 32 bits) and row offset (lower 32 bits). +/// +/// ``` +/// use lance_core::utils::address::RowAddress; +/// +/// let addr = RowAddress::new_from_parts(5, 100); +/// assert_eq!(addr.fragment_id(), 5); +/// assert_eq!(addr.row_offset(), 100); +/// +/// // Convert to/from u64 +/// let raw: u64 = addr.into(); +/// let addr2: RowAddress = raw.into(); +/// assert_eq!(addr, addr2); +/// +/// // Display format +/// assert_eq!(format!("{}", addr), "(5, 100)"); +/// ``` #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct RowAddress(u64); impl RowAddress { pub const FRAGMENT_SIZE: u64 = 1 << 32; - // A fragment id that will never be used + /// A fragment id that will never be used. pub const TOMBSTONE_FRAG: u32 = 0xffffffff; - // A row id that will never be used + /// A row id that will never be used. pub const TOMBSTONE_ROW: u64 = 0xffffffffffffffff; pub fn new_from_u64(row_addr: u64) -> Self { @@ -21,10 +38,20 @@ impl RowAddress { Self(((fragment_id as u64) << 32) | row_offset as u64) } + /// Returns the address for the first row of a fragment. pub fn first_row(fragment_id: u32) -> Self { Self::new_from_parts(fragment_id, 0) } + /// Returns the range of u64 addresses for a given fragment. + /// + /// ``` + /// use lance_core::utils::address::RowAddress; + /// + /// let range = RowAddress::address_range(2); + /// assert_eq!(range.start, 2 * RowAddress::FRAGMENT_SIZE); + /// assert_eq!(range.end, 3 * RowAddress::FRAGMENT_SIZE); + /// ``` pub fn address_range(fragment_id: u32) -> Range { u64::from(Self::first_row(fragment_id))..u64::from(Self::first_row(fragment_id + 1)) } @@ -61,3 +88,29 @@ impl std::fmt::Display for RowAddress { write!(f, "({}, {})", self.fragment_id(), self.row_offset()) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_row_address() { + // new_from_u64 (not in doctest) + let addr = RowAddress::new_from_u64(0x0000_0001_0000_0002); + assert_eq!(addr.fragment_id(), 1); + assert_eq!(addr.row_offset(), 2); + + // address_range uses first_row internally (coverage) + let range = RowAddress::address_range(3); + assert_eq!(range.start, 3 * RowAddress::FRAGMENT_SIZE); + + // From impls with different values than doctest + let addr2 = RowAddress::new_from_parts(7, 8); + let raw: u64 = addr2.into(); + let addr3: RowAddress = raw.into(); + assert_eq!(addr2, addr3); + + // Debug format (doctest only tests Display) + assert_eq!(format!("{:?}", addr), "(1, 2)"); + } +} diff --git a/rust/lance-core/src/utils/backoff.rs b/rust/lance-core/src/utils/backoff.rs index c9d9009a7b3..b30c757bb23 100644 --- a/rust/lance-core/src/utils/backoff.rs +++ b/rust/lance-core/src/utils/backoff.rs @@ -162,6 +162,44 @@ mod tests { assert_eq!(backoff.attempt(), 4); } + #[test] + fn test_backoff_with_base() { + let mut backoff = Backoff::default().with_base(3).with_jitter(0); + assert_eq!(backoff.next_backoff().as_millis(), 50); // 3^0 * 50 + assert_eq!(backoff.next_backoff().as_millis(), 150); // 3^1 * 50 + assert_eq!(backoff.next_backoff().as_millis(), 450); // 3^2 * 50 + } + + #[test] + fn test_backoff_with_unit() { + let mut backoff = Backoff::default().with_unit(100).with_jitter(0); + assert_eq!(backoff.next_backoff().as_millis(), 100); // 2^0 * 100 + assert_eq!(backoff.next_backoff().as_millis(), 200); // 2^1 * 100 + } + + #[test] + fn test_backoff_with_min() { + let mut backoff = Backoff::default().with_min(100).with_jitter(0); + assert_eq!(backoff.next_backoff().as_millis(), 100); // clamped to min + } + + #[test] + fn test_backoff_with_max() { + let mut backoff = Backoff::default().with_max(75).with_jitter(0); + assert_eq!(backoff.next_backoff().as_millis(), 50); + assert_eq!(backoff.next_backoff().as_millis(), 75); // clamped to max + } + + #[test] + fn test_backoff_reset() { + let mut backoff = Backoff::default().with_jitter(0); + assert_eq!(backoff.next_backoff().as_millis(), 50); + assert_eq!(backoff.attempt(), 1); + backoff.reset(); + assert_eq!(backoff.attempt(), 0); + assert_eq!(backoff.next_backoff().as_millis(), 50); + } + #[test] fn test_slot_backoff() { #[cfg_attr(coverage, coverage(off))] diff --git a/rust/lance-core/src/utils/bit.rs b/rust/lance-core/src/utils/bit.rs index 9299d7d9e89..ba4b882691d 100644 --- a/rust/lance-core/src/utils/bit.rs +++ b/rust/lance-core/src/utils/bit.rs @@ -1,20 +1,61 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright The Lance Authors +/// Returns true if the given number is a power of two. +/// +/// ``` +/// use lance_core::utils::bit::is_pwr_two; +/// +/// assert!(is_pwr_two(1)); +/// assert!(is_pwr_two(2)); +/// assert!(is_pwr_two(1024)); +/// assert!(!is_pwr_two(3)); +/// assert!(!is_pwr_two(1000)); +/// ``` pub fn is_pwr_two(n: u64) -> bool { n & (n - 1) == 0 } +/// Returns the number of padding bytes needed to align `n` to `ALIGN`. +/// +/// ``` +/// use lance_core::utils::bit::pad_bytes; +/// +/// assert_eq!(pad_bytes::<8>(0), 0); +/// assert_eq!(pad_bytes::<8>(1), 7); +/// assert_eq!(pad_bytes::<8>(8), 0); +/// assert_eq!(pad_bytes::<8>(9), 7); +/// ``` pub fn pad_bytes(n: usize) -> usize { debug_assert!(is_pwr_two(ALIGN as u64)); (ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1) } +/// Returns the number of padding bytes needed to align `n` to `align`. +/// +/// ``` +/// use lance_core::utils::bit::pad_bytes_to; +/// +/// assert_eq!(pad_bytes_to(0, 8), 0); +/// assert_eq!(pad_bytes_to(1, 8), 7); +/// assert_eq!(pad_bytes_to(8, 8), 0); +/// assert_eq!(pad_bytes_to(9, 8), 7); +/// ``` pub fn pad_bytes_to(n: usize, align: usize) -> usize { debug_assert!(is_pwr_two(align as u64)); (align - (n & (align - 1))) & (align - 1) } +/// Returns the number of padding bytes needed to align `n` to `ALIGN` (u64 version). +/// +/// ``` +/// use lance_core::utils::bit::pad_bytes_u64; +/// +/// assert_eq!(pad_bytes_u64::<8>(0), 0); +/// assert_eq!(pad_bytes_u64::<8>(1), 7); +/// assert_eq!(pad_bytes_u64::<8>(8), 0); +/// assert_eq!(pad_bytes_u64::<8>(9), 7); +/// ``` pub fn pad_bytes_u64(n: u64) -> u64 { debug_assert!(is_pwr_two(ALIGN)); (ALIGN - (n & (ALIGN - 1))) & (ALIGN - 1) @@ -32,9 +73,18 @@ const LOG_TABLE_256: [u8; 256] = [ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, ]; -/// Returns the number of bits needed to represent the given number +/// Returns the number of bits needed to represent the given number. /// /// Inspired by +/// +/// ``` +/// use lance_core::utils::bit::log_2_ceil; +/// +/// assert_eq!(log_2_ceil(1), 1); +/// assert_eq!(log_2_ceil(2), 2); +/// assert_eq!(log_2_ceil(255), 8); +/// assert_eq!(log_2_ceil(256), 9); +/// ``` pub fn log_2_ceil(val: u32) -> u32 { assert!(val > 0); let upper_half = val >> 16; @@ -61,10 +111,24 @@ pub fn log_2_ceil(val: u32) -> u32 { #[cfg(test)] pub mod tests { - use crate::utils::bit::log_2_ceil; + use crate::utils::bit::{is_pwr_two, log_2_ceil, pad_bytes, pad_bytes_to, pad_bytes_u64}; + + #[test] + fn test_bit_utils() { + // Test values not in doctests + assert!(is_pwr_two(4)); + assert!(is_pwr_two(1024)); + assert!(!is_pwr_two(5)); + + // Test different alignment (64) not shown in doctests + assert_eq!(pad_bytes::<64>(100), 28); + assert_eq!(pad_bytes_to(100, 64), 28); + assert_eq!(pad_bytes_u64::<64>(100), 28); + } #[test] fn test_log_2_ceil() { + #[cfg_attr(coverage, coverage(off))] fn classic_approach(mut val: u32) -> u32 { let mut counter = 0; while val > 0 { @@ -82,5 +146,8 @@ pub mod tests { log_2_ceil(1024 * 1024 * 1024), classic_approach(1024 * 1024 * 1024) ); + // Cover the branch where upper_half != 0 but first_quarter == 0 + // (value between 2^16 and 2^24) + assert_eq!(log_2_ceil(100_000), classic_approach(100_000)); } } diff --git a/rust/lance-core/src/utils/hash.rs b/rust/lance-core/src/utils/hash.rs index 14ef805a58f..a09e2d2c1ed 100644 --- a/rust/lance-core/src/utils/hash.rs +++ b/rust/lance-core/src/utils/hash.rs @@ -3,10 +3,25 @@ use std::hash::Hasher; -// A wrapper for &[u8] to allow &[u8] as hash keys, -// the equality for this `U8SliceKey` means that the &[u8] contents are equal. -#[derive(Eq)] +/// A wrapper for `&[u8]` to allow byte slices as hash keys. +/// +/// ``` +/// use lance_core::utils::hash::U8SliceKey; +/// use std::collections::HashMap; +/// +/// let mut map: HashMap = HashMap::new(); +/// map.insert(U8SliceKey(&[1, 2, 3]), 42); +/// +/// assert_eq!(map.get(&U8SliceKey(&[1, 2, 3])), Some(&42)); +/// assert_eq!(map.get(&U8SliceKey(&[1, 2, 4])), None); +/// +/// // Equality is based on slice contents +/// assert_eq!(U8SliceKey(&[1, 2, 3]), U8SliceKey(&[1, 2, 3])); +/// assert_ne!(U8SliceKey(&[1, 2, 3]), U8SliceKey(&[1, 2, 4])); +/// ``` +#[derive(Debug, Eq)] pub struct U8SliceKey<'a>(pub &'a [u8]); + impl PartialEq for U8SliceKey<'_> { fn eq(&self, other: &Self) -> bool { self.0 == other.0 @@ -18,3 +33,18 @@ impl std::hash::Hash for U8SliceKey<'_> { self.0.hash(state); } } + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap; + + #[test] + fn test_u8_slice_key() { + // Test cases not in doctest: key not found, inequality + let mut map = HashMap::new(); + map.insert(U8SliceKey(&[1, 2, 3]), 42); + assert_eq!(map.get(&U8SliceKey(&[4, 5, 6])), None); + assert_ne!(U8SliceKey(&[1]), U8SliceKey(&[2])); + } +}