From a525fe2d4256fc928efbc25eb717b34a084e70d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Neumann?= Date: Fri, 9 Dec 2016 14:43:57 +0100 Subject: [PATCH 1/3] Add zeros iterator --- benches/benches.rs | 110 ++++++++++++++++++++++++++ src/lib.rs | 189 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 276 insertions(+), 23 deletions(-) diff --git a/benches/benches.rs b/benches/benches.rs index 729a661..b896f3c 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -31,6 +31,33 @@ fn iter_ones_using_slice_directly(fb: &FixedBitSet, f: &mut F) } } +#[inline] +fn iter_zeros_using_contains(fb: &FixedBitSet, f: &mut F) { + for bit in 0 .. fb.len() { + if !fb.contains(bit) { + f(bit); + } + } +} + +#[inline] +fn iter_zeros_using_slice_directly(fb: &FixedBitSet, f: &mut F) { + for (block_idx, &block) in fb.as_slice().iter().enumerate() { + let mut bit_pos = block_idx * size_of::() * 8; + let mut block: u32 = block; + let mut n = block.count_zeros(); + + while n != 0 { + if (block & 1) == 0 { + f(bit_pos); + n -= 1; + } + block = block >> 1; + bit_pos += 1; + } + } +} + #[bench] fn bench_iter_ones_using_contains_all_zeros(b: &mut Bencher) { const N: usize = 1_000_000; @@ -110,3 +137,86 @@ fn bench_iter_ones_all_ones(b: &mut Bencher) { }); } + + +// Zeros + + +#[bench] +fn bench_iter_zeros_using_contains_all_zeros(b: &mut Bencher) { + const N: usize = 1_000_000; + let fb = FixedBitSet::with_capacity(N); + + b.iter(|| { + let mut count = 0; + iter_zeros_using_contains(&fb, &mut |_bit| count += 1); + test::black_box(|| { count }); + }); +} + +#[bench] +fn bench_iter_zeros_using_contains_all_ones(b: &mut Bencher) { + const N: usize = 1_000_000; + let mut fb = FixedBitSet::with_capacity(N); + for i in 0..N { fb.insert(i); } + + b.iter(|| { + let mut count = 0; + iter_zeros_using_contains(&fb, &mut |_bit| count += 1); + test::black_box(|| { count }); + }); +} + +#[bench] +fn bench_iter_zeros_using_slice_directly_all_zero(b: &mut Bencher) { + const N: usize = 1_000_000; + let fb = FixedBitSet::with_capacity(N); + + b.iter(|| { + let mut count = 0; + iter_zeros_using_slice_directly(&fb, &mut |_bit| count += 1); + test::black_box(|| { count }); + }); +} + +#[bench] +fn bench_iter_zeros_using_slice_directly_all_ones(b: &mut Bencher) { + const N: usize = 1_000_000; + let mut fb = FixedBitSet::with_capacity(N); + for i in 0..N { fb.insert(i); } + + b.iter(|| { + let mut count = 0; + iter_zeros_using_slice_directly(&fb, &mut |_bit| count += 1); + test::black_box(|| { count }); + }); +} + +#[bench] +fn bench_iter_zeros_all_zeros(b: &mut Bencher) { + const N: usize = 1_000_000; + let fb = FixedBitSet::with_capacity(N); + + b.iter(|| { + let mut count = 0; + for _ in fb.zeros() { + count += 1; + } + test::black_box(|| { count }); + }); +} + +#[bench] +fn bench_iter_zeros_all_ones(b: &mut Bencher) { + const N: usize = 1_000_000; + let mut fb = FixedBitSet::with_capacity(N); + for i in 0..N { fb.insert(i); } + + b.iter(|| { + let mut count = 0; + for _ in fb.zeros() { + count += 1; + } + test::black_box(|| { count }); + }); +} diff --git a/src/lib.rs b/src/lib.rs index 0474cf6..77b318b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ mod range; use std::ops::Index; +use std::mem::size_of; pub use range::IndexRange; static TRUE: bool = true; @@ -194,66 +195,165 @@ impl FixedBitSet /// Iterator element is the index of the `1` bit, type `usize`. #[inline] pub fn ones(&self) -> Ones { + let mut iter = BitIter { + current_bit_idx: 0, + current_block_idx: 0, + current_block: 0, + remaining_blocks: &[] + }; match self.as_slice().split_first() { Some((&block, rem)) => { - Ones { - current_bit_idx: 0, - current_block_idx: 0, - current_block: block, - remaining_blocks: rem + iter.current_block = block; + iter.remaining_blocks = rem; + Ones(iter) + } + None => { + Ones(iter) + } + } + } + + #[inline] + pub fn zeros(&self) -> Zeros { + let mut iter = BitIter { + current_bit_idx: 0, + current_block_idx: 0, + current_block: 0, + remaining_blocks: &[] + }; + match self.as_slice().split_first() { + Some((&block, rem)) => { + iter.current_block = block; + iter.remaining_blocks = rem; + // get position of the last valid bit in the last block + let p = (self.len() % (size_of::() * 8)) as u32; + Zeros { + i: iter, + bits_to_visit: block.count_zeros(), + last_bit_index: p, } } None => { - Ones { - current_bit_idx: 0, - current_block_idx: 0, - current_block: 0, - remaining_blocks: &[] + Zeros { + i: iter, + bits_to_visit: 0, + last_bit_index: 0 } } } } } -pub struct Ones<'a> { +/// Generic iterator over bits +pub struct BitIter<'a> { current_bit_idx: usize, current_block_idx: usize, remaining_blocks: &'a [Block], current_block: Block } +impl<'a> BitIter<'a> { + /// Move `BitIter` to the next block. + #[inline] + pub fn next_block(&mut self) -> Option<(usize, Block)> { + match self.remaining_blocks.split_first() { + Some((&next_block, rest)) => { + self.remaining_blocks = rest; + self.current_block_idx += 1; + let idx = self.current_block_idx * BITS; + let block = next_block; + Some((idx, block)) + } + None => { + // last block => done + None + } + } + } +} + +/// Specialized iterator over set bits. +pub struct Ones<'a>(BitIter<'a>); +/// Specialized iterator over unset bits. +pub struct Zeros<'a> { + i: BitIter<'a>, + // number of unset bits left to visit + bits_to_visit: u32, + // remaining padding of the last block + last_bit_index: u32 +} + impl<'a> Iterator for Ones<'a> { type Item = usize; // the bit position of the '1' + #[inline] fn next(&mut self) -> Option { - let mut block = self.current_block; - let mut idx = self.current_bit_idx; + let mut block = self.0.current_block; + let mut idx = self.0.current_bit_idx; loop { loop { if (block & 1) == 1 { - self.current_block = block >> 1; - self.current_bit_idx = idx + 1; + self.0.current_block = block >> 1; + self.0.current_bit_idx = idx + 1; return Some(idx); } // reordering the two lines below makes a huge (2x) difference in performance! - block = block >> 1; + block >>= 1; idx += 1; if block == 0 { break; } } + // go to next block + match self.0.next_block() { + Some((idx_, block_)) => { + idx = idx_; + block = block_; + } + None => { + return None; + } + } + } + } +} + +impl<'a> Iterator for Zeros<'a> { + type Item = usize; // the bit position of the '1' + #[inline] + fn next(&mut self) -> Option { + let mut block = self.i.current_block; + let mut idx = self.i.current_bit_idx; + + loop { + // loop until all 0 bits are visited + while self.bits_to_visit != 0 { + if (block & 1) == 0 { + self.bits_to_visit -= 1; + self.i.current_block = block >> 1; + self.i.current_bit_idx = idx + 1; + return Some(idx); + } + // reordering the two lines below makes a huge (2x) difference in performance! + block >>= 1; + idx += 1; + } // go to next block - match self.remaining_blocks.split_first() { - Some((&next_block, rest)) => { - self.remaining_blocks = rest; - self.current_block_idx += 1; - idx = self.current_block_idx * BITS; - block = next_block; + match self.i.next_block() { + Some((idx_, block_)) => { + idx = idx_; + block = block_; + if self.i.remaining_blocks.is_empty() { + // set all bits which indices are greater than length of + // the bitset. This ensures that iterator will stop at + // the right bit + block |= !((2 as Block).pow(self.last_bit_index) - 1); + } + self.bits_to_visit = block.count_zeros(); } None => { - // last block => done return None; } } @@ -421,6 +521,49 @@ fn iter_ones_range() { } } +#[test] +fn zeros() { + let mut fb = FixedBitSet::with_capacity(100); + for i in 0..100 { + fb.insert(i); + } + fb.set(11, false); + fb.set(12, false); + fb.set(7, false); + fb.set(35, false); + fb.set(40, false); + fb.set(77, false); + fb.set(95, false); + fb.set(50, false); + fb.set(99, false); + + let ones: Vec<_> = fb.zeros().collect(); + + assert_eq!(vec![7, 11, 12, 35, 40, 50, 77, 95, 99], ones); +} + +#[test] +fn iter_zeros_range() { + fn test_range(from: usize, to: usize, capa: usize) { + assert!(to <= capa); + let mut fb = FixedBitSet::with_capacity(capa); + for i in 0..capa { + fb.insert(i); + } + for i in from..to { + fb.set(i, false); + } + let ones: Vec<_> = fb.zeros().collect(); + let expected: Vec<_> = (from..to).collect(); + assert_eq!(expected, ones); + } + + for i in 0..100 { + test_range(i, 100, 100); + test_range(0, i, 100); + } +} + #[should_panic] #[test] fn count_ones_oob() { From 46760b2dd974eb350c6f67a078e31f6966ba08cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Neumann?= Date: Sat, 17 Dec 2016 16:27:11 +0100 Subject: [PATCH 2/3] Fixed zeros iter for one-block bitsets --- src/lib.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 77b318b..a88703e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,13 +223,19 @@ impl FixedBitSet }; match self.as_slice().split_first() { Some((&block, rem)) => { - iter.current_block = block; - iter.remaining_blocks = rem; // get position of the last valid bit in the last block let p = (self.len() % (size_of::() * 8)) as u32; + iter.current_block = block; + // if bitset consists of only one block mask it accordingly to + // the length + if rem.is_empty() { + iter.current_block |= !((2 as Block).pow(p) - 1); + } + iter.remaining_blocks = rem; + let bits = iter.current_block.count_zeros(); Zeros { i: iter, - bits_to_visit: block.count_zeros(), + bits_to_visit: bits, last_bit_index: p, } } @@ -542,6 +548,46 @@ fn zeros() { assert_eq!(vec![7, 11, 12, 35, 40, 50, 77, 95, 99], ones); } +#[test] +fn zeros_one_block() { + let mut fb = FixedBitSet::with_capacity(2); + fb.set(0, true); + fb.set(1, true); + + let next = fb.zeros().next(); + assert_eq!(next, None); +} + +#[test] +fn zeros_two_blocks() { + let mut fb = FixedBitSet::with_capacity(33); + for i in 0..33 { + fb.set(i, true); + } + let next = fb.zeros().next(); + assert_eq!(next, None); +} + +#[test] +fn ones_one_block() { + let mut fb = FixedBitSet::with_capacity(2); + fb.set(0, false); + fb.set(1, false); + + let next = fb.ones().next(); + assert_eq!(next, None); +} + +#[test] +fn ones_two_blocks() { + let mut fb = FixedBitSet::with_capacity(33); + for i in 0..33 { + fb.set(i, false); + } + let next = fb.ones().next(); + assert_eq!(next, None); +} + #[test] fn iter_zeros_range() { fn test_range(from: usize, to: usize, capa: usize) { From 973927ae6838205f652819a091f8a2ee1575dcd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Neumann?= Date: Thu, 29 Dec 2016 16:14:29 +0100 Subject: [PATCH 3/3] inline count_ones() --- src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index a88703e..888cc8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -150,6 +150,7 @@ impl FixedBitSet /// Use `..` to count the whole content of the bitset. /// /// **Panics** if the range extends past the end of the bitset. + #[inline] pub fn count_ones(&self, range: T) -> usize { let start = range.start().unwrap_or(0); @@ -158,6 +159,7 @@ impl FixedBitSet self.count_ones_impl(start, end) } + #[inline] fn count_ones_impl(&self, start: usize, end: usize) -> usize { let (first_block, first_rem) = div_rem(start, BITS); let (last_block, last_rem) = div_rem(end, BITS);