From f8c7b4bafe27cf65e89cea91e0902cd197f94bbf Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Thu, 15 Jan 2026 17:18:02 +0100 Subject: [PATCH 1/5] Optimize with_capacity_and_blocks to avoid double initialization Use Vec::with_capacity() and pointer writes instead of allocating zeroed memory and overwriting it. This enables efficient construction of bitsets from iterators like repeat(!0) without the overhead of first zeroing the allocation. Mask off unused bits in the last block to ensure consistent behavior with PartialEq and Hash. --- src/lib.rs | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fb4f764..37e0348 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -131,11 +131,60 @@ impl FixedBitSet { /// assert_eq!(format!("{:b}", bs), "0010"); /// ``` pub fn with_capacity_and_blocks>(bits: usize, blocks: I) -> Self { - let mut bitset = Self::with_capacity(bits); - for (subblock, value) in bitset.as_mut_slice().iter_mut().zip(blocks.into_iter()) { - *subblock = value; + if bits == 0 { + return Self::new(); + } + + let (mut simd_block_cnt, rem) = div_rem(bits, SimdBlock::BITS); + simd_block_cnt += (rem > 0) as usize; + + let (mut block_cnt, rem) = div_rem(bits, BITS); + block_cnt += (rem > 0) as usize; + + // SAFETY: We use Vec::with_capacity() to obtain uninitialized memory, and + // initialize all of it before passing ownership to the returned FixedBitSet. + unsafe { + let mut vec = Vec::::with_capacity(simd_block_cnt); + let mut subblock = vec.as_mut_ptr().cast::(); + let subblock_end = subblock.add(block_cnt); + + // Copy as much as we can from blocks + for value in blocks { + subblock.write(value); + subblock = subblock.add(1); + if subblock == subblock_end { + break; + } + } + + // Zero out the remainder of the allocation that the iterator didn't reach. + let simd_block_end = vec.as_mut_ptr().add(simd_block_cnt).cast::(); + core::ptr::write_bytes( + subblock, + 0, + simd_block_end.offset_from(subblock) as usize, + ); + + // Mask off any bits in the last block that the iterator did write to, but + // that are beyond the length. This is necessary so that PartialEq, Hash, + // etc. work correctly. + let rem = bits % BITS; + if rem != 0 { + let last_block_ptr = vec.as_mut_ptr().cast::().add(block_cnt - 1); + let mask = (1usize << rem) - 1; + last_block_ptr.write(last_block_ptr.read() & mask); + } + + let data = NonNull::new_unchecked(vec.as_mut_ptr()).cast(); + let capacity = vec.capacity(); + // FixedBitSet is taking over the ownership of vec's data + core::mem::forget(vec); + FixedBitSet { + data, + capacity, + length: bits, + } } - bitset } /// Grow capacity to **bits**, all new bits initialized to zero From 226463aaff9ada1a30ff9de3e196103de02edaa7 Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Thu, 15 Jan 2026 17:21:08 +0100 Subject: [PATCH 2/5] Add FixedBitset::ones_with_capacity() to create all-ones bitset efficiently --- src/lib.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 37e0348..b0a2ee0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,6 +187,19 @@ impl FixedBitSet { } } + /// Create a new **FixedBitSet** with a specific number of bits, + /// all initially set. + /// + /// For example: + /// ``` + /// let bs = fixedbitset::FixedBitSet::ones_with_capacity(10); + /// assert_eq!(bs.count_ones(..), 10); + /// assert_eq!(bs.len(), 10); + /// ``` + pub fn ones_with_capacity(bits: usize) -> Self { + Self::with_capacity_and_blocks(bits, core::iter::repeat(!0)) + } + /// Grow capacity to **bits**, all new bits initialized to zero #[inline] pub fn grow(&mut self, bits: usize) { From 7976369b41807d6808cdd45ccf2c7a768a988f68 Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Thu, 15 Jan 2026 17:22:35 +0100 Subject: [PATCH 3/5] Add tests --- tests/tests.rs | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/tests/tests.rs b/tests/tests.rs index a52ac30..aba5af0 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -112,6 +112,99 @@ fn grow_and_insert() { assert_eq!(fb.count_ones(..), 34); } +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_empty() { + let fb = FixedBitSet::ones_with_capacity(0); + assert_eq!(fb.len(), 0); + assert!(fb.is_empty()); + assert!(fb.is_clear()); +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_small() { + let fb = FixedBitSet::ones_with_capacity(10); + assert_eq!(fb.len(), 10); + assert_eq!(fb.count_ones(..), 10); + assert!(fb.is_full()); + for i in 0..10 { + assert!(fb.contains(i)); + } + // Bits beyond capacity should not be accessible + assert!(!fb.contains(10)); + assert!(!fb.contains(100)); +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_block_boundary() { + // Test at exact block boundary (64 bits on 64-bit systems) + let fb = FixedBitSet::ones_with_capacity(BITS); + assert_eq!(fb.len(), BITS); + assert_eq!(fb.count_ones(..), BITS); + assert!(fb.is_full()); + for i in 0..BITS { + assert!(fb.contains(i)); + } +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_multiple_blocks() { + // Test with multiple blocks + let size = BITS * 3 + 17; // spans multiple blocks with remainder + let fb = FixedBitSet::ones_with_capacity(size); + assert_eq!(fb.len(), size); + assert_eq!(fb.count_ones(..), size); + assert!(fb.is_full()); + for i in 0..size { + assert!(fb.contains(i)); + } + assert!(!fb.contains(size)); +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_large() { + let size = if cfg!(miri) { 1000 } else { 10000 }; + let fb = FixedBitSet::ones_with_capacity(size); + assert_eq!(fb.len(), size); + assert_eq!(fb.count_ones(..), size); + assert!(fb.is_full()); + + // Spot check some bits + assert!(fb.contains(0)); + assert!(fb.contains(size / 2)); + assert!(fb.contains(size - 1)); + assert!(!fb.contains(size)); +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_ones_iter() { + let fb = FixedBitSet::ones_with_capacity(100); + let ones: Vec<_> = fb.ones().collect(); + let expected: Vec<_> = (0..100).collect(); + assert_eq!(ones, expected); + + let ones_rev: Vec<_> = fb.ones().rev().collect(); + let expected_rev: Vec<_> = (0..100).rev().collect(); + assert_eq!(ones_rev, expected_rev); +} + +#[test] +#[cfg_attr(target_family = "wasm", wasm_bindgen_test)] +fn ones_with_capacity_equals_insert_range() { + // Ensure ones_with_capacity produces the same result as with_capacity + insert_range + for size in [0, 1, 10, BITS - 1, BITS, BITS + 1, BITS * 2, BITS * 3 + 17] { + let a = FixedBitSet::ones_with_capacity(size); + let mut b = FixedBitSet::with_capacity(size); + b.insert_range(..); + assert_eq!(a, b, "mismatch for size {}", size); + } +} + #[test] #[cfg_attr(target_family = "wasm", wasm_bindgen_test)] fn test_toggle() { From f1ba50e9cc643e886b31a23866a44c8f7fd8d6f5 Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Thu, 15 Jan 2026 17:22:49 +0100 Subject: [PATCH 4/5] Fix compiler warnings --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b0a2ee0..1f56963 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1003,7 +1003,7 @@ impl FixedBitSet { pub fn union_count(&self, other: &FixedBitSet) -> usize { let me = self.as_slice(); let other = other.as_slice(); - let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x | *y))); + let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| *x | *y)); match other.len().cmp(&me.len()) { Ordering::Greater => count + Self::batch_count_ones(other[me.len()..].iter().copied()), Ordering::Less => count + Self::batch_count_ones(me[other.len()..].iter().copied()), @@ -1022,7 +1022,7 @@ impl FixedBitSet { self.as_slice() .iter() .zip(other.as_slice()) - .map(|(x, y)| (*x & *y)), + .map(|(x, y)| *x & *y), ) } @@ -1037,7 +1037,7 @@ impl FixedBitSet { self.as_slice() .iter() .zip(other.as_slice().iter()) - .map(|(x, y)| (*x & !*y)), + .map(|(x, y)| *x & !*y), ) + Self::batch_count_ones(self.as_slice().iter().skip(other.as_slice().len()).copied()) } @@ -1050,7 +1050,7 @@ impl FixedBitSet { pub fn symmetric_difference_count(&self, other: &FixedBitSet) -> usize { let me = self.as_slice(); let other = other.as_slice(); - let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| (*x ^ *y))); + let count = Self::batch_count_ones(me.iter().zip(other.iter()).map(|(x, y)| *x ^ *y)); match other.len().cmp(&me.len()) { Ordering::Greater => count + Self::batch_count_ones(other[me.len()..].iter().copied()), Ordering::Less => count + Self::batch_count_ones(me[other.len()..].iter().copied()), From 30aac8f697b74c6f8187db9d6da0ff65092325bb Mon Sep 17 00:00:00 2001 From: Hrvoje Niksic Date: Fri, 16 Jan 2026 09:42:18 +0100 Subject: [PATCH 5/5] Move last-block masking from with_capacity_and_blocks to ones_with_capacity Avoid changing with_capacity_and_blocks behavior by having ones_with_capacity generate correctly masked block values directly. Clarify in docs that bits beyond capacity are stored as provided. --- src/lib.rs | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 1f56963..3b1d456 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -122,7 +122,8 @@ impl FixedBitSet { /// /// If the blocks are not the exact size needed for the capacity /// they will be padded with zeros (if shorter) or truncated to - /// the capacity (if longer). + /// the capacity (if longer). Note that bits within the last block + /// that exceed the capacity are stored as provided. /// /// For example: /// ``` @@ -165,16 +166,6 @@ impl FixedBitSet { simd_block_end.offset_from(subblock) as usize, ); - // Mask off any bits in the last block that the iterator did write to, but - // that are beyond the length. This is necessary so that PartialEq, Hash, - // etc. work correctly. - let rem = bits % BITS; - if rem != 0 { - let last_block_ptr = vec.as_mut_ptr().cast::().add(block_cnt - 1); - let mask = (1usize << rem) - 1; - last_block_ptr.write(last_block_ptr.read() & mask); - } - let data = NonNull::new_unchecked(vec.as_mut_ptr()).cast(); let capacity = vec.capacity(); // FixedBitSet is taking over the ownership of vec's data @@ -197,7 +188,19 @@ impl FixedBitSet { /// assert_eq!(bs.len(), 10); /// ``` pub fn ones_with_capacity(bits: usize) -> Self { - Self::with_capacity_and_blocks(bits, core::iter::repeat(!0)) + if bits == 0 { + return Self::new(); + } + let (mut block_cnt, rem) = div_rem(bits, BITS); + block_cnt += (rem > 0) as usize; + let last_block = if rem == 0 { !0 } else { (1usize << rem) - 1 }; + + Self::with_capacity_and_blocks( + bits, + core::iter::repeat(!0) + .take(block_cnt - 1) + .chain(core::iter::once(last_block)), + ) } /// Grow capacity to **bits**, all new bits initialized to zero