From c7372e19b7a5bd339bd824a2d58603c87ef91050 Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Mon, 31 Jul 2023 16:29:52 -0500 Subject: [PATCH 1/8] [WIP] replace write cache with `CidHashMap` --- src/db/car/plain.rs | 56 ++++++++++++++++++++++++++++++++--------- src/ipld/cid_hashmap.rs | 16 ++++++++++++ src/ipld/mod.rs | 1 + 3 files changed, 61 insertions(+), 12 deletions(-) diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index 59a6a25cfa21..da1b4cae5580 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -61,7 +61,7 @@ //! - A wrapper that abstracts over car formats for reading. use crate::blocks::{Tipset, TipsetKeys}; -use ahash::HashMapExt as _; +use crate::ipld::{CidHashMap, CidHashMapEntry}; use cid::Cid; use fvm_ipld_blockstore::Blockstore; use fvm_ipld_car::CarHeader; @@ -137,7 +137,7 @@ impl PlainCar { reader: buf_reader.into_inner(), index, roots, - write_cache: ahash::HashMap::new(), + write_cache: CidHashMap::new(), }), }) } @@ -185,7 +185,7 @@ impl TryFrom<&'static [u8]> for PlainCar> { struct PlainCarInner { reader: ReaderT, - write_cache: ahash::HashMap>, + write_cache: CidHashMap>, index: ahash::HashMap, roots: Vec, } @@ -211,22 +211,38 @@ where .. } = &mut *self.inner.lock(); match (index.get(k), write_cache.entry(*k)) { - (Some(_location), Occupied(cached)) => { + (Some(_location), CidHashMapEntry::V1DagCborBlake2b(Occupied(cached))) => { trace!("evicting from write cache"); Ok(Some(cached.remove())) } - (Some(UncompressedBlockDataLocation { offset, length }), Vacant(_)) => { + (Some(_location), CidHashMapEntry::Fallback(Occupied(cached))) => { + trace!("evicting from write cache"); + Ok(Some(cached.remove())) + } + ( + Some(UncompressedBlockDataLocation { offset, length }), + CidHashMapEntry::V1DagCborBlake2b(Vacant(_)), + ) + | ( + Some(UncompressedBlockDataLocation { offset, length }), + CidHashMapEntry::Fallback(Vacant(_)), + ) => { trace!("fetching from disk"); reader.seek(SeekFrom::Start(*offset))?; let mut data = vec![0; usize::try_from(*length).unwrap()]; reader.read_exact(&mut data)?; Ok(Some(data)) } - (None, Occupied(cached)) => { + (None, CidHashMapEntry::V1DagCborBlake2b(Occupied(cached))) => { + trace!("getting from write cache"); + Ok(Some(cached.get().clone())) + } + (None, CidHashMapEntry::Fallback(Occupied(cached))) => { trace!("getting from write cache"); Ok(Some(cached.get().clone())) } - (None, Vacant(_)) => { + (None, CidHashMapEntry::V1DagCborBlake2b(Vacant(_))) + | (None, CidHashMapEntry::Fallback(Vacant(_))) => { trace!("not found"); Ok(None) } @@ -263,29 +279,45 @@ pub struct CompressedBlockDataLocation { /// # Panics /// - If the write cache already contains different data with this CID fn handle_write_cache( - write_cache: &mut ahash::HashMap>, + write_cache: &mut CidHashMap>, index: &mut ahash::HashMap, k: &Cid, block: &[u8], ) -> anyhow::Result<()> { match (index.get(k), write_cache.entry(*k)) { - (None, Occupied(already)) => match already.get() == block { + (None, CidHashMapEntry::V1DagCborBlake2b(Occupied(already))) => { + match already.get() == block { + true => { + trace!("already in cache"); + Ok(()) + } + false => panic!("mismatched content on second write for CID {k}"), + } + } + (None, CidHashMapEntry::Fallback(Occupied(already))) => match already.get() == block { true => { trace!("already in cache"); Ok(()) } false => panic!("mismatched content on second write for CID {k}"), }, - (None, Vacant(vacant)) => { + (None, CidHashMapEntry::V1DagCborBlake2b(Vacant(vacant))) => { + trace!(bytes = block.len(), "insert into cache"); + vacant.insert(block.to_owned()); + Ok(()) + } + (None, CidHashMapEntry::Fallback(Vacant(vacant))) => { trace!(bytes = block.len(), "insert into cache"); vacant.insert(block.to_owned()); Ok(()) } - (Some(_), Vacant(_)) => { + (Some(_), CidHashMapEntry::V1DagCborBlake2b(Vacant(_))) + | (Some(_), CidHashMapEntry::Fallback(Vacant(_))) => { trace!("already on disk"); Ok(()) } - (Some(_), Occupied(_)) => { + (Some(_), CidHashMapEntry::V1DagCborBlake2b(Occupied(_))) + | (Some(_), CidHashMapEntry::Fallback(Occupied(_))) => { unreachable!("we don't insert a CID in the write cache if it exists on disk") } } diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index a3d8e83a84f2..2b2cc17ecffc 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -4,6 +4,7 @@ use crate::utils::cid::{CidVariant, BLAKE2B256_SIZE}; use ahash::{HashMap, HashMapExt}; use cid::Cid; +use std::collections::hash_map::Entry; // The size of a CID is 96 bytes. A CID contains: // - a version @@ -22,6 +23,11 @@ pub struct CidHashMap { fallback_hash_map: HashMap, } +pub enum CidHashMapEntry<'a, V: 'a> { + V1DagCborBlake2b(Entry<'a, [u8; BLAKE2B256_SIZE], V>), + Fallback(Entry<'a, Cid, V>), +} + impl CidHashMap { /// Creates an empty `HashMap` with CID type keys. pub fn new() -> Self { @@ -79,6 +85,16 @@ impl CidHashMap { pub fn len(&self) -> usize { self.v1_dagcbor_blake2b_hash_map.len() + self.fallback_hash_map.len() } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry(&mut self, k: Cid) -> CidHashMapEntry<'_, V> { + match k.try_into() { + Ok(CidVariant::V1DagCborBlake2b(bytes)) => { + CidHashMapEntry::V1DagCborBlake2b(self.v1_dagcbor_blake2b_hash_map.entry(bytes)) + } + Err(()) => CidHashMapEntry::Fallback(self.fallback_hash_map.entry(k)), + } + } } #[cfg(test)] diff --git a/src/ipld/mod.rs b/src/ipld/mod.rs index 49f5e4b901e3..e132c04ab3a5 100644 --- a/src/ipld/mod.rs +++ b/src/ipld/mod.rs @@ -12,6 +12,7 @@ pub use libipld_core::ipld::Ipld; pub use util::*; pub use self::cid_hashmap::CidHashMap; +pub use self::cid_hashmap::CidHashMapEntry; pub use self::cid_hashset::CidHashSet; pub use libipld_core::serde::{from_ipld, to_ipld}; From 1c9747b74adcaefc17ded1fd9f3e9eaab845863c Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Tue, 1 Aug 2023 10:53:08 -0500 Subject: [PATCH 2/8] updates to `entry` fn Co-authored-by: Aatif Syed --- src/db/car/plain.rs | 50 +++++---------------------- src/ipld/cid_hashmap.rs | 75 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 50 deletions(-) diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index da1b4cae5580..bde48a337519 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -69,7 +69,6 @@ use integer_encoding::VarIntReader; use parking_lot::Mutex; use std::{ any::Any, - collections::hash_map::Entry::{Occupied, Vacant}, io::{ self, BufReader, ErrorKind::{InvalidData, UnexpectedEof, Unsupported}, @@ -79,6 +78,7 @@ use std::{ }; use tokio::io::{AsyncWrite, AsyncWriteExt}; use tracing::{debug, trace}; +use CidHashMapEntry::{Occupied, Vacant}; /// **Note that all operations on this store are blocking**. /// @@ -211,38 +211,22 @@ where .. } = &mut *self.inner.lock(); match (index.get(k), write_cache.entry(*k)) { - (Some(_location), CidHashMapEntry::V1DagCborBlake2b(Occupied(cached))) => { + (Some(_location), Occupied(cached)) => { trace!("evicting from write cache"); Ok(Some(cached.remove())) } - (Some(_location), CidHashMapEntry::Fallback(Occupied(cached))) => { - trace!("evicting from write cache"); - Ok(Some(cached.remove())) - } - ( - Some(UncompressedBlockDataLocation { offset, length }), - CidHashMapEntry::V1DagCborBlake2b(Vacant(_)), - ) - | ( - Some(UncompressedBlockDataLocation { offset, length }), - CidHashMapEntry::Fallback(Vacant(_)), - ) => { + (Some(UncompressedBlockDataLocation { offset, length }), Vacant(_)) => { trace!("fetching from disk"); reader.seek(SeekFrom::Start(*offset))?; let mut data = vec![0; usize::try_from(*length).unwrap()]; reader.read_exact(&mut data)?; Ok(Some(data)) } - (None, CidHashMapEntry::V1DagCborBlake2b(Occupied(cached))) => { - trace!("getting from write cache"); - Ok(Some(cached.get().clone())) - } - (None, CidHashMapEntry::Fallback(Occupied(cached))) => { + (None, Occupied(cached)) => { trace!("getting from write cache"); Ok(Some(cached.get().clone())) } - (None, CidHashMapEntry::V1DagCborBlake2b(Vacant(_))) - | (None, CidHashMapEntry::Fallback(Vacant(_))) => { + (None, Vacant(_)) => { trace!("not found"); Ok(None) } @@ -285,39 +269,23 @@ fn handle_write_cache( block: &[u8], ) -> anyhow::Result<()> { match (index.get(k), write_cache.entry(*k)) { - (None, CidHashMapEntry::V1DagCborBlake2b(Occupied(already))) => { - match already.get() == block { - true => { - trace!("already in cache"); - Ok(()) - } - false => panic!("mismatched content on second write for CID {k}"), - } - } - (None, CidHashMapEntry::Fallback(Occupied(already))) => match already.get() == block { + (None, Occupied(already)) => match already.get() == block { true => { trace!("already in cache"); Ok(()) } false => panic!("mismatched content on second write for CID {k}"), }, - (None, CidHashMapEntry::V1DagCborBlake2b(Vacant(vacant))) => { - trace!(bytes = block.len(), "insert into cache"); - vacant.insert(block.to_owned()); - Ok(()) - } - (None, CidHashMapEntry::Fallback(Vacant(vacant))) => { + (None, Vacant(vacant)) => { trace!(bytes = block.len(), "insert into cache"); vacant.insert(block.to_owned()); Ok(()) } - (Some(_), CidHashMapEntry::V1DagCborBlake2b(Vacant(_))) - | (Some(_), CidHashMapEntry::Fallback(Vacant(_))) => { + (Some(_), Vacant(_)) => { trace!("already on disk"); Ok(()) } - (Some(_), CidHashMapEntry::V1DagCborBlake2b(Occupied(_))) - | (Some(_), CidHashMapEntry::Fallback(Occupied(_))) => { + (Some(_), Occupied(_)) => { unreachable!("we don't insert a CID in the write cache if it exists on disk") } } diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 2b2cc17ecffc..9fec42adcf95 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -4,7 +4,7 @@ use crate::utils::cid::{CidVariant, BLAKE2B256_SIZE}; use ahash::{HashMap, HashMapExt}; use cid::Cid; -use std::collections::hash_map::Entry; +use std::collections::hash_map::{Entry, OccupiedEntry, VacantEntry}; // The size of a CID is 96 bytes. A CID contains: // - a version @@ -23,9 +23,52 @@ pub struct CidHashMap { fallback_hash_map: HashMap, } -pub enum CidHashMapEntry<'a, V: 'a> { - V1DagCborBlake2b(Entry<'a, [u8; BLAKE2B256_SIZE], V>), - Fallback(Entry<'a, Cid, V>), +pub enum CidHashMapEntry<'a, V> { + Occupied(Occupied<'a, V>), + Vacant(Vacant<'a, V>), +} + +pub struct Occupied<'a, V> { + inner: OccupiedInner<'a, V>, +} + +enum OccupiedInner<'a, V> { + V1(OccupiedEntry<'a, [u8; 32], V>), + Fallback(OccupiedEntry<'a, Cid, V>), +} + +impl Occupied<'_, V> { + pub fn get(&self) -> &V { + let ret = match &self.inner { + OccupiedInner::V1(o) => o.get(), + OccupiedInner::Fallback(o) => o.get(), + }; + ret + } + pub fn remove(self) -> V { + match self.inner { + OccupiedInner::V1(o) => o.remove(), + OccupiedInner::Fallback(o) => o.remove(), + } + } +} + +pub struct Vacant<'a, V> { + inner: VacantInner<'a, V>, +} + +enum VacantInner<'a, V> { + V1(VacantEntry<'a, [u8; 32], V>), + Fallback(VacantEntry<'a, Cid, V>), +} + +impl<'a, V> Vacant<'a, V> { + pub fn insert(self, value: V) -> &'a mut V { + match self.inner { + VacantInner::V1(v) => v.insert(value), + VacantInner::Fallback(v) => v.insert(value), + } + } } impl CidHashMap { @@ -87,12 +130,26 @@ impl CidHashMap { } /// Gets the given key's corresponding entry in the map for in-place manipulation. - pub fn entry(&mut self, k: Cid) -> CidHashMapEntry<'_, V> { - match k.try_into() { - Ok(CidVariant::V1DagCborBlake2b(bytes)) => { - CidHashMapEntry::V1DagCborBlake2b(self.v1_dagcbor_blake2b_hash_map.entry(bytes)) + pub fn entry(&mut self, key: Cid) -> CidHashMapEntry<'_, V> { + match CidVariant::try_from(key) { + Ok(CidVariant::V1DagCborBlake2b(v1)) => { + match self.v1_dagcbor_blake2b_hash_map.entry(v1) { + Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { + inner: OccupiedInner::V1(occupied), + }), + Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { + inner: VacantInner::V1(vacant), + }), + } } - Err(()) => CidHashMapEntry::Fallback(self.fallback_hash_map.entry(k)), + Err(_must_use_fallback) => match self.fallback_hash_map.entry(key) { + Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { + inner: OccupiedInner::Fallback(occupied), + }), + Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { + inner: VacantInner::Fallback(vacant), + }), + }, } } } From 4e4bfa17d88069743ffc2f898b0c98c8753561d9 Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Tue, 1 Aug 2023 13:08:34 -0500 Subject: [PATCH 3/8] use `CidHashMap` for index --- src/db/car/plain.rs | 12 +++++------ src/ipld/cid_hashmap.rs | 47 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 11 deletions(-) diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index bde48a337519..7023a8b55b87 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -122,7 +122,7 @@ impl PlainCar { // now create the index let index = iter::from_fn(|| read_block_data_location_and_skip(&mut buf_reader).transpose()) - .collect::, _>>()?; + .collect::, _>>()?; match index.len() { 0 => Err(io::Error::new( @@ -155,7 +155,7 @@ impl PlainCar { /// In an arbitrary order #[cfg(test)] pub fn cids(&self) -> Vec { - self.inner.lock().index.keys().cloned().collect() + self.inner.lock().index.keys().collect() } pub fn into_dyn(self) -> PlainCar> { @@ -186,7 +186,7 @@ impl TryFrom<&'static [u8]> for PlainCar> { struct PlainCarInner { reader: ReaderT, write_cache: CidHashMap>, - index: ahash::HashMap, + index: CidHashMap, roots: Vec, } @@ -210,7 +210,7 @@ where index, .. } = &mut *self.inner.lock(); - match (index.get(k), write_cache.entry(*k)) { + match (index.get(*k), write_cache.entry(*k)) { (Some(_location), Occupied(cached)) => { trace!("evicting from write cache"); Ok(Some(cached.remove())) @@ -264,11 +264,11 @@ pub struct CompressedBlockDataLocation { /// - If the write cache already contains different data with this CID fn handle_write_cache( write_cache: &mut CidHashMap>, - index: &mut ahash::HashMap, + index: &mut CidHashMap, k: &Cid, block: &[u8], ) -> anyhow::Result<()> { - match (index.get(k), write_cache.entry(*k)) { + match (index.get(*k), write_cache.entry(*k)) { (None, Occupied(already)) => match already.get() == block { true => { trace!("already in cache"); diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 9fec42adcf95..506fe7210806 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -3,8 +3,10 @@ use crate::utils::cid::{CidVariant, BLAKE2B256_SIZE}; use ahash::{HashMap, HashMapExt}; +use cid::multihash::{self}; use cid::Cid; -use std::collections::hash_map::{Entry, OccupiedEntry, VacantEntry}; +use fvm_ipld_encoding::DAG_CBOR; +use std::collections::hash_map::{Entry, Keys, OccupiedEntry, VacantEntry}; // The size of a CID is 96 bytes. A CID contains: // - a version @@ -23,6 +25,26 @@ pub struct CidHashMap { fallback_hash_map: HashMap, } +pub struct CidHashMapKeys<'a, V> { + v1_dagcbor_blake2b_keys: Keys<'a, [u8; BLAKE2B256_SIZE], V>, + fallback_keys: Keys<'a, Cid, V>, +} + +impl Iterator for CidHashMapKeys<'_, V> { + type Item = Cid; + + fn next(&mut self) -> Option { + match self.v1_dagcbor_blake2b_keys.next() { + Some(bytes) => Some(Cid::new_v1( + DAG_CBOR, + multihash::Multihash::wrap(multihash::Code::Blake2b256.into(), bytes) + .expect("failed to convert digest to CID"), + )), + None => self.fallback_keys.next().copied(), + } + } +} + pub enum CidHashMapEntry<'a, V> { Occupied(Occupied<'a, V>), Vacant(Vacant<'a, V>), @@ -152,15 +174,30 @@ impl CidHashMap { }, } } + + #[cfg(test)] + pub fn keys(&self) -> CidHashMapKeys<'_, V> { + CidHashMapKeys { + v1_dagcbor_blake2b_keys: self.v1_dagcbor_blake2b_hash_map.keys(), + fallback_keys: self.fallback_hash_map.keys(), + } + } +} + +impl FromIterator<(Cid, V)> for CidHashMap { + fn from_iter>(iter: T) -> Self { + let mut map = Self::new(); + for (k, v) in iter { + map.insert(k, v); + } + map + } } #[cfg(test)] mod tests { use super::*; - use cid::{ - multihash::{self, MultihashDigest}, - Cid, - }; + use cid::multihash::MultihashDigest; use fvm_ipld_encoding::DAG_CBOR; use quickcheck_macros::quickcheck; From e2be924eca0a0df275a575ec482efaa0acb29e7e Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Tue, 15 Aug 2023 14:11:20 +0100 Subject: [PATCH 4/8] updates from merge conflicts --- src/db/car/plain.rs | 2 +- src/ipld/cid_hashmap.rs | 28 ++++++++++------------------ 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/db/car/plain.rs b/src/db/car/plain.rs index a78e7faea774..d0a0dc1f06d0 100644 --- a/src/db/car/plain.rs +++ b/src/db/car/plain.rs @@ -60,11 +60,11 @@ //! - CARv2 support //! - A wrapper that abstracts over car formats for reading. +use crate::ipld::{CidHashMap, CidHashMapEntry}; use crate::{ blocks::{Tipset, TipsetKeys}, utils::encoding::from_slice_with_fallback, }; -use crate::ipld::{CidHashMap, CidHashMapEntry}; use cid::Cid; use fvm_ipld_blockstore::Blockstore; diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 23fe5031c501..507c8d44ffe1 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -67,12 +67,6 @@ impl Occupied<'_, V> { }; ret } - pub fn remove(self) -> V { - match self.inner { - OccupiedInner::V1(o) => o.remove(), - OccupiedInner::Fallback(o) => o.remove(), - } - } } pub struct Vacant<'a, V> { @@ -151,18 +145,16 @@ impl CidHashMap { /// Gets the given key's corresponding entry in the map for in-place manipulation. pub fn entry(&mut self, key: Cid) -> CidHashMapEntry<'_, V> { - match CidVariant::try_from(key) { - Ok(CidVariant::V1DagCborBlake2b(v1)) => { - match self.v1_dagcbor_blake2b_hash_map.entry(v1) { - Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { - inner: OccupiedInner::V1(occupied), - }), - Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { - inner: VacantInner::V1(vacant), - }), - } - } - Err(_must_use_fallback) => match self.fallback_hash_map.entry(key) { + match CidVariant::from(key) { + CidVariant::V1DagCborBlake2b(v1) => match self.v1_dagcbor_blake2b_hash_map.entry(v1) { + Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { + inner: OccupiedInner::V1(occupied), + }), + Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { + inner: VacantInner::V1(vacant), + }), + }, + CidVariant::Generic(generic) => match self.fallback_hash_map.entry(*generic) { Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { inner: OccupiedInner::Fallback(occupied), }), From 7d80d8759b32280f72c2514df47b99555d92e7d7 Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Tue, 15 Aug 2023 15:20:08 +0100 Subject: [PATCH 5/8] fix issues from merge conflict resolution --- src/ipld/cid_hashmap.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 932d90729ab9..6999ca21c34b 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -221,16 +221,6 @@ impl CidHashMap { } } -impl FromIterator<(Cid, V)> for CidHashMap { - fn from_iter>(iter: T) -> Self { - let mut map = Self::new(); - for (k, v) in iter { - map.insert(k, v); - } - map - } -} - #[cfg(test)] mod tests { use super::*; From 6b90aea855d463f2dd37df9f2a4edf6ae8910f49 Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Fri, 18 Aug 2023 12:35:17 +0100 Subject: [PATCH 6/8] fix: use `SmallCid` in `CidHashMap` entry --- src/ipld/cid_hashmap.rs | 74 +++++++++-------------------------------- 1 file changed, 16 insertions(+), 58 deletions(-) diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 972e4ac44a15..31377e3e8189 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -3,10 +3,8 @@ use crate::utils::cid::SmallCid; use ahash::{HashMap, HashMapExt}; -use cid::multihash::{self}; use cid::Cid; -use fvm_ipld_encoding::DAG_CBOR; -use std::collections::hash_map::{Entry, Keys, OccupiedEntry, VacantEntry}; +use std::collections::hash_map::{Keys, OccupiedEntry, VacantEntry}; // The size of a CID is 96 bytes. A CID contains: // - a version @@ -57,22 +55,14 @@ impl Iterator for IntoIter { } pub struct CidHashMapKeys<'a, V> { - v1_dagcbor_blake2b_keys: Keys<'a, [u8; BLAKE2B256_SIZE], V>, - fallback_keys: Keys<'a, Cid, V>, + keys: Keys<'a, SmallCid, V>, } impl Iterator for CidHashMapKeys<'_, V> { type Item = Cid; fn next(&mut self) -> Option { - match self.v1_dagcbor_blake2b_keys.next() { - Some(bytes) => Some(Cid::new_v1( - DAG_CBOR, - multihash::Multihash::wrap(multihash::Code::Blake2b256.into(), bytes) - .expect("failed to convert digest to CID"), - )), - None => self.fallback_keys.next().copied(), - } + self.keys.next().map(|small_cid| small_cid.cid()) } } @@ -81,40 +71,19 @@ pub enum CidHashMapEntry<'a, V> { Vacant(Vacant<'a, V>), } -pub struct Occupied<'a, V> { - inner: OccupiedInner<'a, V>, -} - -enum OccupiedInner<'a, V> { - V1(OccupiedEntry<'a, [u8; 32], V>), - Fallback(OccupiedEntry<'a, Cid, V>), -} +pub struct Occupied<'a, V>(OccupiedEntry<'a, SmallCid, V>); impl Occupied<'_, V> { pub fn get(&self) -> &V { - let ret = match &self.inner { - OccupiedInner::V1(o) => o.get(), - OccupiedInner::Fallback(o) => o.get(), - }; - ret + self.0.get() } } -pub struct Vacant<'a, V> { - inner: VacantInner<'a, V>, -} - -enum VacantInner<'a, V> { - V1(VacantEntry<'a, [u8; 32], V>), - Fallback(VacantEntry<'a, Cid, V>), -} +pub struct Vacant<'a, V>(VacantEntry<'a, SmallCid, V>); impl<'a, V> Vacant<'a, V> { pub fn insert(self, value: V) -> &'a mut V { - match self.inner { - VacantInner::V1(v) => v.insert(value), - VacantInner::Fallback(v) => v.insert(value), - } + self.0.insert(value) } } @@ -157,31 +126,20 @@ impl CidHashMap { /// Gets the given key's corresponding entry in the map for in-place manipulation. pub fn entry(&mut self, key: Cid) -> CidHashMapEntry<'_, V> { - match CidVariant::from(key) { - CidVariant::V1DagCborBlake2b(v1) => match self.v1_dagcbor_blake2b_hash_map.entry(v1) { - Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { - inner: OccupiedInner::V1(occupied), - }), - Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { - inner: VacantInner::V1(vacant), - }), - }, - CidVariant::Generic(generic) => match self.fallback_hash_map.entry(*generic) { - Entry::Occupied(occupied) => CidHashMapEntry::Occupied(Occupied { - inner: OccupiedInner::Fallback(occupied), - }), - Entry::Vacant(vacant) => CidHashMapEntry::Vacant(Vacant { - inner: VacantInner::Fallback(vacant), - }), - }, + match self.0.entry(SmallCid::from(key)) { + std::collections::hash_map::Entry::Occupied(occupied) => { + CidHashMapEntry::Occupied(Occupied(occupied)) + } + std::collections::hash_map::Entry::Vacant(vacant) => { + CidHashMapEntry::Vacant(Vacant(vacant)) + } } } #[cfg(test)] pub fn keys(&self) -> CidHashMapKeys<'_, V> { CidHashMapKeys { - v1_dagcbor_blake2b_keys: self.v1_dagcbor_blake2b_hash_map.keys(), - fallback_keys: self.fallback_hash_map.keys(), + keys: self.0.keys(), } } } @@ -189,7 +147,7 @@ impl CidHashMap { #[cfg(test)] mod tests { use super::*; - use cid::multihash::MultihashDigest; + use cid::multihash::{self, MultihashDigest}; use fvm_ipld_encoding::DAG_CBOR; use quickcheck_macros::quickcheck; From 3192498107ca5f183a272a3e89de4370fe29ee0b Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Mon, 21 Aug 2023 11:29:53 +0100 Subject: [PATCH 7/8] add unit tests --- src/ipld/cid_hashmap.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index 31377e3e8189..e08834fdca0e 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -225,6 +225,37 @@ mod tests { assert_eq!(cid_hash_map.len(), hash_map.len()); } + #[quickcheck] + fn check_entry(cid_vector: Vec<(Cid, u64)>, cid: Cid, insert: bool) { + let (mut cid_hash_map, mut hash_map) = generate_hash_maps(cid_vector); + // Insert key half of the time to ensure equal probability of entry being occupied or vacant; occasionally the key will already be present when quickcheck generates the maps, so we also remove the key with 50% probability. + if insert { + cid_hash_map.insert(cid, 0); + hash_map.insert(cid, 0); + } else { + cid_hash_map.remove(cid); + hash_map.remove(&cid); + } + match cid_hash_map.entry(cid) { + CidHashMapEntry::Occupied(occupied) => { + assert_eq!(occupied.get(), hash_map.get(&cid).unwrap()); + } + CidHashMapEntry::Vacant(_) => { + assert_eq!(cid_hash_map.get(cid), hash_map.get(&cid)); + } + } + } + + #[quickcheck] + fn keys(cid_vector: Vec<(Cid, u64)>) { + let (cid_hash_map, hash_map) = generate_hash_maps(cid_vector); + // Hash maps are not required to be ordered, but it is important for vectors, so sort the vectors of keys before comparing. + assert_eq!( + cid_hash_map.keys().collect::>().sort(), + hash_map.keys().cloned().collect::>().sort() + ); + } + #[quickcheck] fn cidhashmap_to_hashmap_to_cidhashmap(cid_vector: Vec<(Cid, u64)>) { let (cid_hash_map, _) = generate_hash_maps(cid_vector); From 369a53db0e4b571dc52e52d90b9b9644f61b56d8 Mon Sep 17 00:00:00 2001 From: Josh Jones Date: Mon, 21 Aug 2023 11:48:06 +0100 Subject: [PATCH 8/8] fix: move `sort` outside of `assert` statement --- src/ipld/cid_hashmap.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index e08834fdca0e..e46f83255082 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -250,10 +250,11 @@ mod tests { fn keys(cid_vector: Vec<(Cid, u64)>) { let (cid_hash_map, hash_map) = generate_hash_maps(cid_vector); // Hash maps are not required to be ordered, but it is important for vectors, so sort the vectors of keys before comparing. - assert_eq!( - cid_hash_map.keys().collect::>().sort(), - hash_map.keys().cloned().collect::>().sort() - ); + let mut cid_hash_map = cid_hash_map.keys().collect::>(); + cid_hash_map.sort(); + let mut hash_map = hash_map.keys().cloned().collect::>(); + hash_map.sort(); + assert_eq!(cid_hash_map, hash_map); } #[quickcheck]