diff --git a/README.md b/README.md index 499c9729..1c93fcb0 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ For more information about the philosophy behind the library, see [Roman's tutor ## Available Data Structures - [BTreeMap]: A Key-Value store +- [BTreeSet]: A set of unique elements - [Vec]: A growable array - [Log]: An append-only list of variable-size entries - [Cell]: A serializable value @@ -38,7 +39,9 @@ Stable structures are able to work directly in stable memory because each data s its own memory. When initializing a stable structure, a memory is provided that the data structure can use to store its data. -Here's a basic example: +Here's a basic examples: + +### Example: BTreeMap ```rust use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; @@ -54,12 +57,25 @@ This includes stable memory, a vector ([VectorMemory]), or even a flat file ([Fi The example above initializes a [BTreeMap] with a [DefaultMemoryImpl], which maps to stable memory when used in a canister and to a [VectorMemory] otherwise. +### Example: BTreeSet + +The `BTreeSet` is a stable set implementation based on a B-Tree. It allows efficient insertion, deletion, and lookup of unique elements. + +```rust +use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + +set.insert(42); +assert!(set.contains(&42)); +assert_eq!(set.pop_first(), Some(42)); +assert!(set.is_empty()); +``` + Note that **stable structures cannot share memories.** Each memory must belong to only one stable structure. For example, this fails when run in a canister: - ```no_run use ic_stable_structures::{BTreeMap, DefaultMemoryImpl}; let mut map_1: BTreeMap = BTreeMap::init(DefaultMemoryImpl::default()); diff --git a/src/btreeset.rs b/src/btreeset.rs new file mode 100644 index 00000000..63133c2e --- /dev/null +++ b/src/btreeset.rs @@ -0,0 +1,1073 @@ +//! This module implements a set based on a B-Tree in stable memory. + +use crate::{btreemap::Iter as IterMap, BTreeMap, Memory, Storable}; +use core::ops::RangeBounds; + +#[cfg(test)] +mod proptests; + +/// An iterator over the entries of a [`BTreeSet`]. +pub struct Iter<'a, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + iter_internal: IterMap<'a, K, (), M>, +} + +impl<'a, K, M> Iter<'a, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + fn new(iter: IterMap<'a, K, (), M>) -> Self { + Iter { + iter_internal: iter, + } + } +} + +impl Iterator for Iter<'_, K, M> +where + K: Storable + Ord + Clone, + M: Memory, +{ + type Item = K; + + fn next(&mut self) -> Option { + self.iter_internal.next().map(|(a, _)| a) + } +} + +/// A B-Tree set implementation that stores its data into a designated memory. +/// +/// # Overview +/// +/// A `BTreeSet` is a "stable" set implementation based on a B-tree, designed to work directly in stable memory. +/// +/// # Memory Implementations +/// +/// `BTreeSet` works with any memory implementation that satisfies the [`Memory`] trait: +/// +/// - [`Ic0StableMemory`](crate::Ic0StableMemory): Stores data in the Internet Computer's stable memory. +/// - [`VectorMemory`](crate::VectorMemory): In-memory implementation backed by a Rust `Vec`. +/// - [`FileMemory`](crate::FileMemory): Persists data to disk using a file. +/// - [`DefaultMemoryImpl`](crate::DefaultMemoryImpl): Automatically selects the appropriate memory backend +/// based on the environment: +/// - Uses `Ic0StableMemory` when running in an Internet Computer canister (wasm32 target). +/// - Falls back to `VectorMemory` in other environments (like tests or non-IC contexts). +/// +/// For most use cases, [`DefaultMemoryImpl`](crate::DefaultMemoryImpl) is recommended as it provides +/// the right implementation based on the runtime context. +/// +/// # Examples +/// +/// ## Basic Usage +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// +/// set.insert(42); +/// assert!(set.contains(&42)); +/// assert_eq!(set.pop_first(), Some(42)); +/// assert!(set.is_empty()); +/// ``` +/// +/// ## Range Queries +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// set.insert(1); +/// set.insert(2); +/// set.insert(3); +/// +/// let range: Vec<_> = set.range(2..).collect(); +/// assert_eq!(range, vec![2, 3]); +/// ``` +/// +/// ## Custom Types +/// +/// You can store custom types in a `BTreeSet` by implementing the `Storable` trait: +/// +/// ```rust +/// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl, Storable}; +/// use std::borrow::Cow; +/// +/// #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +/// struct CustomType { +/// id: u64, +/// } +/// +/// impl Storable for CustomType { +/// fn to_bytes(&self) -> Cow<[u8]> { +/// Cow::Owned(self.id.to_le_bytes().to_vec()) +/// } +/// +/// fn from_bytes(bytes: Cow<[u8]>) -> Self { +/// let id = u64::from_le_bytes(bytes.as_ref().try_into().unwrap()); +/// CustomType { id } +/// } +/// +/// const BOUND: ic_stable_structures::storable::Bound = +/// ic_stable_structures::storable::Bound::Bounded { +/// max_size: 8, +/// is_fixed_size: true, +/// }; +/// } +/// +/// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); +/// set.insert(CustomType { id: 42 }); +/// assert!(set.contains(&CustomType { id: 42 })); +/// ``` +/// +/// ### Bounded vs Unbounded Types +/// +/// When implementing `Storable`, you must specify whether your type is bounded or unbounded: +/// +/// - **Unbounded (`Bound::Unbounded`)**: +/// - Use when your type's serialized size can vary or has no fixed maximum. +/// - Recommended for most custom types, especially those containing Strings or Vecs. +/// - Example: `const BOUND: Bound = Bound::Unbounded;` +/// +/// - **Bounded (`Bound::Bounded{ max_size, is_fixed_size }`)**: +/// - Use when you know the maximum serialized size of your type. +/// - Enables memory optimizations in the `BTreeSet`. +/// - Example: `const BOUND: Bound = Bound::Bounded { max_size: 100, is_fixed_size: false };` +/// - For types with truly fixed size (like primitive types), set `is_fixed_size: true`. +/// +/// If unsure, use `Bound::Unbounded` as it's the safer choice. +/// +/// # Warning +/// +/// Once you've deployed with a bounded type, you cannot increase its `max_size` in +/// future versions without risking data corruption. You can, however, migrate from a bounded type +/// to an unbounded type if needed. For evolving data structures, prefer `Bound::Unbounded`. +pub struct BTreeSet +where + K: Storable + Ord + Clone, + M: Memory, +{ + // The underlying implementation uses a BTreeMap with unit values. + // This design allows us to reuse the existing BTreeMap implementation. + // However, if needed, this could be optimized in the future to avoid + // the overhead of storing unit values. + map: BTreeMap, +} + +impl BTreeSet +where + K: Storable + Ord + Clone, + M: Memory, +{ + /// Initializes a `BTreeSet`. + /// + /// If the memory provided already contains a `BTreeSet`, then that + /// map is loaded. Otherwise, a new `BTreeSet` instance is created. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::init(DefaultMemoryImpl::default()); + /// ``` + pub fn init(memory: M) -> Self { + BTreeSet { + map: BTreeMap::::init(memory), + } + } + + /// Creates a new instance of a `BTreeSet`. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// ``` + pub fn new(memory: M) -> Self { + BTreeSet { + map: BTreeMap::::new(memory), + } + } + + /// Loads the `BTreeSet` from memory. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// + /// // Save the set to memory + /// let memory = set.into_memory(); + /// + /// // Load the set from memory + /// let loaded_set: BTreeSet = BTreeSet::load(memory); + /// assert!(loaded_set.contains(&42)); + /// ``` + pub fn load(memory: M) -> Self { + BTreeSet { + map: BTreeMap::::load(memory), + } + } + + /// Inserts a key into the set. Returns `true` if the key + /// did not exist in the set before. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// assert!(set.insert(42)); + /// assert!(!set.insert(42)); // Key already exists + /// ``` + pub fn insert(&mut self, key: K) -> bool { + self.map.insert(key, ()).is_none() + } + + /// Returns `true` if the key exists in the set, `false` otherwise. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// assert!(set.contains(&42)); + /// assert!(!set.contains(&7)); + /// ``` + pub fn contains(&self, key: &K) -> bool { + self.map.get(key).is_some() + } + + /// Returns `true` if the set contains no elements. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// assert!(set.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// Returns the number of elements in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.len(), 2); + /// ``` + pub fn len(&self) -> u64 { + self.map.len() + } + + /// Returns the underlying memory. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let memory = set.into_memory(); + /// ``` + pub fn into_memory(self) -> M { + self.map.into_memory() + } + + /// Removes all elements from the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.clear(); + /// assert!(set.is_empty()); + /// ``` + pub fn clear(&mut self) { + self.map.clear_new(); + } + + /// Returns the first key in the set. This key + /// is the minimum key in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.first(), Some(7)); + /// ``` + pub fn first(&self) -> Option { + self.map.first_key_value().map(|(a, _)| a) + } + + /// Returns the last key in the set. This key + /// is the maximum key in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.last(), Some(42)); + /// ``` + pub fn last(&self) -> Option { + self.map.last_key_value().map(|(a, _)| a) + } + + /// Removes a key from the set, returning `true` if it exists. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// assert!(set.remove(&42)); + /// assert!(!set.contains(&42)); + /// ``` + pub fn remove(&mut self, key: &K) -> bool { + self.map.remove(key).is_some() + } + + /// Removes and returns the last element in the set. The key of this element is the maximum key that was in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.pop_last(), Some(42)); + /// ``` + pub fn pop_last(&mut self) -> Option { + self.map.pop_last().map(|(a, _)| a) + } + + /// Removes and returns the first element in the set. The key of this element is the minimum key that was in the set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// assert_eq!(set.pop_first(), Some(7)); + /// ``` + pub fn pop_first(&mut self) -> Option { + self.map.pop_first().map(|(a, _)| a) + } + + /// Returns an iterator over the entries of the set, sorted by key. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(42); + /// set.insert(7); + /// for key in set.iter() { + /// println!("{}", key); + /// } + /// ``` + pub fn iter(&self) -> Iter { + Iter::new(self.map.iter()) + } + + /// Returns an iterator over the entries in the set where keys + /// belong to the specified range. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(1); + /// set.insert(2); + /// set.insert(3); + /// let range: Vec<_> = set.range(2..).collect(); + /// assert_eq!(range, vec![2, 3]); + /// ``` + pub fn range(&self, key_range: impl RangeBounds) -> Iter { + Iter::new(self.map.range(key_range)) + } + + /// Returns an iterator pointing to the first element strictly below the given bound. + /// Returns an empty iterator if there are no keys strictly below the given bound. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// set.insert(1); + /// set.insert(2); + /// set.insert(3); + /// + /// let upper_bound: Option = set.iter_upper_bound(&3).next(); + /// assert_eq!(upper_bound, Some(2)); + /// ``` + pub fn iter_upper_bound(&self, bound: &K) -> Iter { + Iter::new(self.map.iter_upper_bound(bound)) + } + + /// Returns an iterator over the union of this set and another. + /// + /// The union of two sets is a set containing all elements that are in either set. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set2.insert(2); + /// set2.insert(3); + /// + /// let union: Vec<_> = set1.union(&set2).collect(); + /// assert_eq!(union, vec![1, 2, 3]); + /// ``` + pub fn union<'a>(&'a self, other: &'a BTreeSet) -> impl Iterator + 'a { + let mut iter_self = self.iter(); + let mut iter_other = other.iter(); + let mut next_self = iter_self.next(); + let mut next_other = iter_other.next(); + + // Use a closure to merge the two iterators while maintaining sorted order. + std::iter::from_fn(move || { + match (next_self.clone(), next_other.clone()) { + // If both iterators have elements, compare the current elements. + (Some(ref a), Some(ref b)) => match a.cmp(b) { + std::cmp::Ordering::Less => { + // If the element from `self` is smaller, yield it and advance `self`. + next_self = iter_self.next(); + Some(a.clone()) + } + std::cmp::Ordering::Greater => { + // If the element from `other` is smaller, yield it and advance `other`. + next_other = iter_other.next(); + Some(b.clone()) + } + std::cmp::Ordering::Equal => { + // If the elements are equal, yield one and advance both iterators. + next_self = iter_self.next(); + next_other = iter_other.next(); + Some(a.clone()) + } + }, + // If only `self` has elements remaining, yield them. + (Some(ref a), None) => { + next_self = iter_self.next(); + Some(a.clone()) + } + // If only `other` has elements remaining, yield them. + (None, Some(ref b)) => { + next_other = iter_other.next(); + Some(b.clone()) + } + // If both iterators are exhausted, stop the iteration. + (None, None) => None, + } + }) + } + + /// Returns an iterator over the intersection of this set and another. + /// + /// The intersection of two sets is a set containing only the elements that are in both sets. + /// + /// # Example + /// + /// ```rust + /// use ic_stable_structures::{BTreeSet, DefaultMemoryImpl}; + /// + /// let mut set1: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// let mut set2: BTreeSet = BTreeSet::new(DefaultMemoryImpl::default()); + /// + /// set1.insert(1); + /// set1.insert(2); + /// set1.insert(3); + /// + /// set2.insert(2); + /// set2.insert(3); + /// set2.insert(4); + /// + /// let intersection: Vec<_> = set1.intersection(&set2).collect(); + /// assert_eq!(intersection, vec![2, 3]); + /// ``` + pub fn intersection<'a>(&'a self, other: &'a BTreeSet) -> impl Iterator + 'a { + let mut iter_self = self.iter(); + let mut iter_other = other.iter(); + let mut next_self = iter_self.next(); + let mut next_other = iter_other.next(); + + // Use a closure to find common elements by traversing both iterators simultaneously. + std::iter::from_fn(move || { + while let (Some(ref a), Some(ref b)) = (next_self.clone(), next_other.clone()) { + match a.cmp(b) { + std::cmp::Ordering::Less => { + // If the element from `self` is smaller, advance `self`. + next_self = iter_self.next(); + } + std::cmp::Ordering::Greater => { + // If the element from `other` is smaller, advance `other`. + next_other = iter_other.next(); + } + std::cmp::Ordering::Equal => { + // If the elements are equal, yield one and advance both iterators. + next_self = iter_self.next(); + next_other = iter_other.next(); + return Some(a.clone()); + } + } + } + // Stop the iteration when either iterator is exhausted. + None + }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::storable::Blob; + use crate::VectorMemory; + use std::cell::RefCell; + use std::rc::Rc; + + /// Creates a new shared memory instance. + pub(crate) fn make_memory() -> Rc>> { + Rc::new(RefCell::new(Vec::new())) + } + + pub(crate) fn b(x: &[u8]) -> Blob<10> { + Blob::<10>::try_from(x).unwrap() + } + + /// A test runner that runs the test using `BTreeSet`. + pub fn run_btree_test(f: F) + where + K: Storable + Ord + Clone, + F: Fn(BTreeSet) -> R, + { + let mem = make_memory(); + let btree = BTreeSet::new(mem); + f(btree); + } + + #[test] + fn test_union_with_duplicates() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + set1.insert(3); + + set2.insert(2); + set2.insert(3); + set2.insert(4); + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union, vec![1, 2, 3, 4]); + } + + #[test] + fn test_intersection_with_duplicates() { + let mem1 = make_memory(); + let mem2 = make_memory(); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + set1.insert(1); + set1.insert(2); + set1.insert(3); + + set2.insert(2); + set2.insert(3); + set2.insert(4); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection, vec![2, 3]); + } + + #[test] + fn test_union_and_intersection_with_identical_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..100 { + set1.insert(i); + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 100); + assert_eq!(union, (0..100).collect::>()); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection.len(), 100); + assert_eq!(intersection, (0..100).collect::>()); + } + + #[test] + fn test_union_and_intersection_with_disjoin_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..50 { + set1.insert(i); + } + for i in 50..100 { + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 100); + assert_eq!(union, (0..100).collect::>()); + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert!(intersection.is_empty()); + } + + #[test] + fn test_union_with_large_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..1000 { + set1.insert(i); + } + for i in 500..1500 { + set2.insert(i); + } + + let union: Vec<_> = set1.union(&set2).collect(); + assert_eq!(union.len(), 1500); + assert_eq!(union[0], 0); + assert_eq!(union[1499], 1499); + } + + #[test] + fn test_intersection_with_large_sets() { + let mem1 = Rc::new(RefCell::new(Vec::new())); + let mem2 = Rc::new(RefCell::new(Vec::new())); + let mut set1: BTreeSet = BTreeSet::new(mem1); + let mut set2: BTreeSet = BTreeSet::new(mem2); + + for i in 0..1000 { + set1.insert(i); + } + for i in 500..1500 { + set2.insert(i); + } + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + assert_eq!(intersection.len(), 500); + assert_eq!(intersection[0], 500); + assert_eq!(intersection[499], 999); + } + + #[test] + fn init_preserves_data_set() { + run_btree_test(|mut btree| { + assert!(btree.insert(b(&[1, 2, 3]))); + assert!(btree.contains(&b(&[1, 2, 3]))); + + // Reload the btree + let btree = BTreeSet::init(btree.into_memory()); + + // Data still exists. + assert!(btree.contains(&b(&[1, 2, 3]))); + }); + } + + #[test] + fn test_insert_and_contains() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + assert!(!btreeset.contains(&1u32)); + btreeset.insert(1u32); + assert!(btreeset.contains(&1u32)); + } + + #[test] + fn test_remove() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(1u32); + assert!(btreeset.contains(&1u32)); + btreeset.remove(&1u32); + assert!(!btreeset.contains(&1u32)); + } + + #[test] + fn test_iter_upper_bound() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + for i in 0u32..100 { + btreeset.insert(i); + + // Test that `iter_upper_bound` returns the largest element strictly below the bound. + for j in 1u32..=i { + assert_eq!( + btreeset.iter_upper_bound(&(j + 1)).next(), + Some(j), + "failed to get an upper bound for {}", + j + 1 + ); + } + assert_eq!( + btreeset.iter_upper_bound(&0).next(), + None, + "0 must not have an upper bound" + ); + } + } + + #[test] + fn test_iter() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(1u32); + btreeset.insert(2u32); + btreeset.insert(3u32); + + let elements: Vec<_> = btreeset.iter().collect(); + assert_eq!(elements, vec![1u32, 2u32, 3u32]); + } + + #[test] + fn test_range() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + for i in 1u32..=10 { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(4u32..8u32).collect(); + assert_eq!(range, vec![4u32, 5u32, 6u32, 7u32]); + } + + #[test] + fn test_first_and_last() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(3u32); + btreeset.insert(1u32); + btreeset.insert(2u32); + + assert_eq!(btreeset.first(), Some(1u32)); + assert_eq!(btreeset.last(), Some(3u32)); + } + + #[test] + fn test_len_and_is_empty() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + + btreeset.insert(1u32); + assert!(!btreeset.is_empty()); + assert_eq!(btreeset.len(), 1); + } + + #[test] + fn test_pop_first_and_last() { + let mem = make_memory(); + let mut btreeset = BTreeSet::new(mem); + + btreeset.insert(3u32); + btreeset.insert(1u32); + btreeset.insert(2u32); + + assert_eq!(btreeset.pop_first(), Some(1u32)); + assert_eq!(btreeset.pop_last(), Some(3u32)); + assert_eq!(btreeset.len(), 1); + assert_eq!(btreeset.first(), Some(2u32)); + assert_eq!(btreeset.last(), Some(2u32)); + } + + #[test] + fn test_clear() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + btreeset.insert(1); + btreeset.insert(2); + btreeset.insert(3); + + assert_eq!(btreeset.len(), 3); + btreeset.clear(); + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + assert_eq!(btreeset.iter().next(), None); + } + + #[test] + fn test_iterate_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1000 { + btreeset.insert(i); + } + + let elements: Vec<_> = btreeset.iter().collect(); + assert_eq!(elements.len(), 1000); + assert_eq!(elements[0], 0); + assert_eq!(elements[999], 999); + } + + #[test] + fn test_iter_upper_bound_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0u32..1000 { + btreeset.insert(i); + } + + assert_eq!(btreeset.iter_upper_bound(&500).next(), Some(499)); + assert_eq!(btreeset.iter_upper_bound(&0).next(), None); + assert_eq!(btreeset.iter_upper_bound(&1000).next(), Some(999)); + } + + #[test] + fn test_range_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0u32..1000 { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(100..200).collect(); + assert_eq!(range.len(), 100); + assert_eq!(range[0], 100); + assert_eq!(range[99], 199); + } + + #[test] + fn test_empty_set() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(btreeset.is_empty()); + assert_eq!(btreeset.len(), 0); + assert_eq!(btreeset.first(), None); + assert_eq!(btreeset.last(), None); + assert_eq!(btreeset.iter().next(), None); + } + + #[test] + fn test_insert_duplicate() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(btreeset.insert(42)); + assert!(!btreeset.insert(42)); // Duplicate insert + assert_eq!(btreeset.len(), 1); + assert!(btreeset.contains(&42)); + } + + #[test] + fn test_remove_nonexistent() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert!(!btreeset.remove(&42)); // Removing a non-existent element + assert!(btreeset.is_empty()); + } + + #[test] + fn test_pop_first_and_last_empty() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + assert_eq!(btreeset.pop_first(), None); + assert_eq!(btreeset.pop_last(), None); + } + + #[test] + fn test_iter_upper_bound_empty() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + assert_eq!(btreeset.iter_upper_bound(&42u32).next(), None); + } + + #[test] + fn test_range_empty() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + let range: Vec<_> = btreeset.range(10..20).collect(); + assert!(range.is_empty()); + } + + #[test] + fn test_insert_and_remove_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + assert!(btreeset.insert(i)); + } + assert_eq!(btreeset.len(), 1_000); + + for i in 0..1_000 { + assert!(btreeset.remove(&i)); + } + assert!(btreeset.is_empty()); + } + + #[test] + fn test_remove_nonexistent_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + assert!(btreeset.insert(i)); + } + + for i in 1_000..2_000 { + assert!(!btreeset.remove(&i)); // Non-existent elements + } + assert_eq!(btreeset.len(), 1_000); + } + + #[test] + fn test_iterate_empty_set() { + let mem = make_memory(); + let btreeset: BTreeSet = BTreeSet::new(mem); + + let elements: Vec<_> = btreeset.iter().collect(); + assert!(elements.is_empty()); + } + + #[test] + fn test_range_with_no_matches() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..10 { + btreeset.insert(i); + } + + let range: Vec<_> = btreeset.range(20..30).collect(); + assert!(range.is_empty()); + } + + #[test] + fn test_clear_and_reuse() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..100 { + btreeset.insert(i); + } + assert_eq!(btreeset.len(), 100); + + btreeset.clear(); + assert!(btreeset.is_empty()); + + for i in 100..200 { + btreeset.insert(i); + } + assert_eq!(btreeset.len(), 100); + assert!(btreeset.contains(&150)); + } + + #[test] + fn test_pop_first_and_last_large_set() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 0..1_000 { + btreeset.insert(i); + } + + for i in 0..500 { + assert_eq!(btreeset.pop_first(), Some(i)); + } + + for i in (500..1_000).rev() { + assert_eq!(btreeset.pop_last(), Some(i)); + } + + assert!(btreeset.is_empty()); + } + + #[test] + fn test_iter_upper_bound_edge_cases() { + let mem = make_memory(); + let mut btreeset: BTreeSet = BTreeSet::new(mem); + + for i in 1..=10 { + btreeset.insert(i); + } + + assert_eq!(btreeset.iter_upper_bound(&1).next(), None); // No element strictly below 1 + assert_eq!(btreeset.iter_upper_bound(&5).next(), Some(4)); // Largest element below 5 + assert_eq!(btreeset.iter_upper_bound(&11).next(), Some(10)); // Largest element below 11 + } +} diff --git a/src/btreeset/proptests.rs b/src/btreeset/proptests.rs new file mode 100644 index 00000000..8e8dd91f --- /dev/null +++ b/src/btreeset/proptests.rs @@ -0,0 +1,233 @@ +use crate::{btreeset::BTreeSet, Memory}; +use proptest::collection::vec as pvec; +use proptest::prelude::*; +use std::collections::BTreeSet as StdBTreeSet; +use test_strategy::proptest; + +#[derive(Debug, Clone)] +enum Operation { + Insert(Vec), + Remove(Vec), + Contains(Vec), + Iter { from: usize, len: usize }, + Range { from: usize, len: usize }, + PopFirst, + PopLast, +} + +// A custom strategy that gives unequal weights to the different operations. +// Note that `Insert` has a higher weight than `Remove` so that, on average, BTreeSets +// are growing in size the more operations are executed. +fn operation_strategy() -> impl Strategy { + prop_oneof![ + 50 => any::>().prop_map(Operation::Insert), + 20 => any::>().prop_map(Operation::Remove), + 20 => any::>().prop_map(Operation::Contains), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Iter { from, len }), + 5 => (any::(), any::()) + .prop_map(|(from, len)| Operation::Range { from, len }), + 2 => Just(Operation::PopFirst), + 2 => Just(Operation::PopLast), + ] +} + +// Runs a comprehensive test for the major stable BTreeSet operations. +// Results are validated against a standard BTreeSet. +#[proptest(cases = 10)] +fn comprehensive(#[strategy(pvec(operation_strategy(), 100..5_000))] ops: Vec) { + let mem = crate::btreeset::test::make_memory(); + let mut btreeset = BTreeSet::new(mem); + let mut std_btreeset = StdBTreeSet::new(); + + // Execute all the operations, validating that the stable btreeset behaves similarly to a std + // btreeset. + for op in ops.into_iter() { + execute_operation(&mut std_btreeset, &mut btreeset, op); + } +} + +#[proptest] +fn set_min_max(#[strategy(pvec(any::(), 10..100))] keys: Vec) { + crate::btreeset::test::run_btree_test(|mut set| { + prop_assert_eq!(set.first(), None); + prop_assert_eq!(set.last(), None); + + for (n, key) in keys.iter().enumerate() { + set.insert(*key); + + let min = keys[0..=n].iter().min().unwrap(); + let max = keys[0..=n].iter().max().unwrap(); + + prop_assert_eq!(set.first(), Some(*min)); + prop_assert_eq!(set.last(), Some(*max)); + } + + Ok(()) + }); +} + +#[proptest] +fn set_upper_bound_iter(#[strategy(pvec(0u64..u64::MAX - 1, 10..100))] keys: Vec) { + crate::btreeset::test::run_btree_test(|mut set| { + for k in keys.iter() { + set.insert(*k); + + prop_assert_eq!(Some(*k), set.iter_upper_bound(&(k + 1)).next()); + } + + Ok(()) + }); +} + +// Given an operation, executes it on the given stable btreeset and standard btreeset, verifying +// that the result of the operation is equal in both btrees. +fn execute_operation( + std_btreeset: &mut StdBTreeSet>, + btreeset: &mut BTreeSet, M>, + op: Operation, +) { + match op { + Operation::Insert(key) => { + let std_res = std_btreeset.insert(key.clone()); + + eprintln!("Insert({})", hex::encode(&key)); + let res = btreeset.insert(key); + assert_eq!(std_res, res); + } + Operation::Remove(key) => { + let std_res = std_btreeset.remove(&key); + + eprintln!("Remove({})", hex::encode(&key)); + let res = btreeset.remove(&key); + assert_eq!(std_res, res); + } + Operation::Contains(key) => { + let std_res = std_btreeset.contains(&key); + + eprintln!("Contains({})", hex::encode(&key)); + let res = btreeset.contains(&key); + assert_eq!(std_res, res); + } + Operation::Iter { from, len } => { + assert_eq!(std_btreeset.len(), btreeset.len() as usize); + if std_btreeset.is_empty() { + return; + } + + let from = from % std_btreeset.len(); + let len = len % std_btreeset.len(); + + eprintln!("Iterate({}, {})", from, len); + let std_iter = std_btreeset.iter().skip(from).take(len); + let mut stable_iter = btreeset.iter().skip(from).take(len); + for k1 in std_iter { + let k2 = stable_iter.next().unwrap(); + assert_eq!(k1, &k2); + } + assert!(stable_iter.next().is_none()); + } + Operation::Range { from, len } => { + assert_eq!(std_btreeset.len(), btreeset.len() as usize); + if std_btreeset.is_empty() { + return; + } + + eprintln!("Range({}, {})", from, len); + let from = from % std_btreeset.len(); + let end = std::cmp::min(std_btreeset.len() - 1, from + len); + + // Create a range for the stable btreeset from the keys at indexes `from` and `end`. + let range_start = btreeset.iter().skip(from).take(1).next().unwrap().clone(); + let range_end = btreeset.iter().skip(end).take(1).next().unwrap().clone(); + let stable_range = btreeset.range(range_start..range_end); + + // Create a range for the std btreeset from the keys at indexes `from` and `end`. + let range_start = std_btreeset + .iter() + .skip(from) + .take(1) + .next() + .unwrap() + .clone(); + let range_end = std_btreeset + .iter() + .skip(end) + .take(1) + .next() + .unwrap() + .clone(); + let std_range = std_btreeset.range(range_start..range_end); + + for (k1, k2) in std_range.zip(stable_range) { + assert_eq!(k1, &k2); + } + } + Operation::PopFirst => { + eprintln!("PopFirst"); + assert_eq!(std_btreeset.pop_first(), btreeset.pop_first()); + } + Operation::PopLast => { + eprintln!("PopLast"); + assert_eq!(std_btreeset.pop_last(), btreeset.pop_last()); + } + }; +} + +#[proptest] +fn test_union( + #[strategy(pvec(any::(), 1..100))] keys1: Vec, + #[strategy(pvec(any::(), 1..100))] keys2: Vec, +) { + crate::btreeset::test::run_btree_test(|mut set1| { + let mut set2 = BTreeSet::new(crate::btreeset::test::make_memory()); + let mut std_set1 = StdBTreeSet::new(); + let mut std_set2 = StdBTreeSet::new(); + + for key in &keys1 { + set1.insert(*key); + std_set1.insert(*key); + } + + for key in &keys2 { + set2.insert(*key); + std_set2.insert(*key); + } + + let union: Vec<_> = set1.union(&set2).collect(); + let std_union: Vec<_> = std_set1.union(&std_set2).cloned().collect(); + + prop_assert_eq!(union, std_union); + + Ok(()) + }); +} + +#[proptest] +fn test_intersection( + #[strategy(pvec(any::(), 1..100))] keys1: Vec, + #[strategy(pvec(any::(), 1..100))] keys2: Vec, +) { + crate::btreeset::test::run_btree_test(|mut set1| { + let mut set2 = BTreeSet::new(crate::btreeset::test::make_memory()); + let mut std_set1 = StdBTreeSet::new(); + let mut std_set2 = StdBTreeSet::new(); + + for key in &keys1 { + set1.insert(*key); + std_set1.insert(*key); + } + + for key in &keys2 { + set2.insert(*key); + std_set2.insert(*key); + } + + let intersection: Vec<_> = set1.intersection(&set2).collect(); + let std_intersection: Vec<_> = std_set1.intersection(&std_set2).cloned().collect(); + + prop_assert_eq!(intersection, std_intersection); + + Ok(()) + }); +} diff --git a/src/lib.rs b/src/lib.rs index e1d0e1d2..e528dd4b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,7 @@ pub mod file_mem; mod ic0_memory; // Memory API for canisters. pub mod log; pub use log::{Log as StableLog, Log}; +pub mod btreeset; pub mod memory_manager; pub mod min_heap; pub mod reader; @@ -21,6 +22,7 @@ pub use vec::{Vec as StableVec, Vec}; pub mod vec_mem; pub mod writer; pub use btreemap::{BTreeMap, BTreeMap as StableBTreeMap}; +pub use btreeset::{BTreeSet, BTreeSet as StableBTreeSet}; pub use file_mem::FileMemory; #[cfg(target_arch = "wasm32")] pub use ic0_memory::Ic0StableMemory;