diff --git a/src/ipld/cid_hashmap.rs b/src/ipld/cid_hashmap.rs index c648a5d9bc16..93f5d2f08645 100644 --- a/src/ipld/cid_hashmap.rs +++ b/src/ipld/cid_hashmap.rs @@ -1,7 +1,7 @@ // Copyright 2019-2023 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT -use crate::utils::cid::{CidVariant, BLAKE2B256_SIZE}; +use crate::utils::cid::SmallCid; use ahash::{HashMap, HashMapExt}; use cid::Cid; @@ -17,10 +17,7 @@ use cid::Cid; // length=32. Taking advantage of this knowledge, we can store the vast majority of CIDs (+99.99%) // in one third of the usual space (32 bytes vs 96 bytes). #[derive(Clone, Debug, Default, PartialEq)] -pub struct CidHashMap { - v1_dagcbor_blake2b_hash_map: HashMap<[u8; BLAKE2B256_SIZE], V>, - fallback_hash_map: HashMap, -} +pub struct CidHashMap(HashMap); impl Extend<(Cid, V)> for CidHashMap { fn extend>(&mut self, iter: T) { @@ -38,91 +35,59 @@ impl FromIterator<(Cid, V)> for CidHashMap { } } -pub struct IntoIter { - small: std::collections::hash_map::IntoIter<[u8; BLAKE2B256_SIZE], V>, - fallback: std::collections::hash_map::IntoIter, -} +pub struct IntoIter(std::collections::hash_map::IntoIter); impl IntoIterator for CidHashMap { type Item = (Cid, V); type IntoIter = IntoIter; fn into_iter(self) -> Self::IntoIter { - let CidHashMap { - v1_dagcbor_blake2b_hash_map, - fallback_hash_map, - } = self; - Self::IntoIter { - small: v1_dagcbor_blake2b_hash_map.into_iter(), - fallback: fallback_hash_map.into_iter(), - } + IntoIter(self.0.into_iter()) } } impl Iterator for IntoIter { type Item = (Cid, V); fn next(&mut self) -> Option { - self.small - .next() - .map(|(bytes, v)| (Cid::from(CidVariant::V1DagCborBlake2b(bytes)), v)) - .or_else(|| self.fallback.next()) + self.0.next().map(|(small_cid, v)| (small_cid.cid(), v)) } } impl CidHashMap { /// Creates an empty `HashMap` with CID type keys. pub fn new() -> Self { - Self { - v1_dagcbor_blake2b_hash_map: HashMap::new(), - fallback_hash_map: HashMap::new(), - } + Self(HashMap::new()) } /// Returns `true` if the map contains a value for the specified key. pub fn contains_key(&self, k: Cid) -> bool { - match k.into() { - CidVariant::V1DagCborBlake2b(bytes) => { - self.v1_dagcbor_blake2b_hash_map.contains_key(&bytes) - } - CidVariant::Generic(_) => self.fallback_hash_map.contains_key(&k), - } + self.0.contains_key(&SmallCid::from(k)) } /// Inserts a key-value pair into the map; if the map did not have this key present, [`None`] is returned. pub fn insert(&mut self, k: Cid, v: V) -> Option { - match k.into() { - CidVariant::V1DagCborBlake2b(bytes) => { - self.v1_dagcbor_blake2b_hash_map.insert(bytes, v) - } - CidVariant::Generic(_) => self.fallback_hash_map.insert(k, v), - } + self.0.insert(SmallCid::from(k), v) } /// Removes a key from the map, returning the value at the key if the key /// was previously in the map. pub fn remove(&mut self, k: Cid) -> Option { - match k.into() { - CidVariant::V1DagCborBlake2b(bytes) => self.v1_dagcbor_blake2b_hash_map.remove(&bytes), - CidVariant::Generic(_) => self.fallback_hash_map.remove(&k), - } + self.0.remove(&SmallCid::from(k)) } /// Returns the number of elements the map can hold without reallocating. pub fn capacity(&self) -> usize { - self.v1_dagcbor_blake2b_hash_map.capacity() + self.fallback_hash_map.capacity() + self.0.capacity() } /// Returns a reference to the value corresponding to the key. pub fn get(&self, k: Cid) -> Option<&V> { - match k.into() { - CidVariant::V1DagCborBlake2b(bytes) => self.v1_dagcbor_blake2b_hash_map.get(&bytes), - CidVariant::Generic(_) => self.fallback_hash_map.get(&k), - } + self.0.get(&SmallCid::from(k)) } /// Returns the number of elements in the map. pub fn len(&self) -> usize { - self.v1_dagcbor_blake2b_hash_map.len() + self.fallback_hash_map.len() + self.0.len() } } diff --git a/src/ipld/frozen_cids.rs b/src/ipld/frozen_cids.rs index d20f85fd75e1..56daed420c6c 100644 --- a/src/ipld/frozen_cids.rs +++ b/src/ipld/frozen_cids.rs @@ -1,12 +1,12 @@ // Copyright 2019-2023 ChainSafe Systems // SPDX-License-Identifier: Apache-2.0, MIT -use crate::utils::cid::CidVariant; +use crate::utils::cid::SmallCid; use cid::Cid; use serde::{Deserialize, Serialize}; -/// A wrapper around `Box<[CidVariant]>` with a [`Cid`]-friendly API: -/// - Uses [`CidVariant`] over [`Cid`] to save memory on common CIDs - see docs for that type for more +/// A wrapper around `Box<[SmallCid]>` with a [`Cid`]-friendly API: +/// - Uses [`SmallCid`] over [`Cid`] to save memory on common CIDs - see docs for that type for more /// - Uses `Box<[...]>`, over `Vec<...>` avoiding vector overallocation /// /// There will be MANY small collections of [`FrozenCids`] over the codebase, so these space savings matter @@ -16,11 +16,11 @@ pub struct FrozenCids( #[cfg_attr(test, arbitrary(gen( |g| Vec::arbitrary(g).into_boxed_slice() )))] - Box<[CidVariant]>, + Box<[SmallCid]>, ); pub struct Iter<'a> { - cids: std::slice::Iter<'a, CidVariant>, + cids: std::slice::Iter<'a, SmallCid>, } impl<'a> IntoIterator for &'a FrozenCids { @@ -50,12 +50,7 @@ impl From> for FrozenCids { fn from(cids: Vec) -> Self { let mut small_cids = Vec::with_capacity(cids.len()); for cid in cids { - match cid.into() { - CidVariant::V1DagCborBlake2b(bytes) => { - small_cids.push(CidVariant::V1DagCborBlake2b(bytes)) - } - _ => small_cids.push(CidVariant::Generic(Box::new(cid))), - } + small_cids.push(SmallCid::from(cid)); } FrozenCids(small_cids.into_boxed_slice()) } @@ -69,16 +64,7 @@ impl From for Vec { impl From<&FrozenCids> for Vec { fn from(frozen_cids: &FrozenCids) -> Self { - let mut cids = Vec::with_capacity(frozen_cids.0.len()); - for cid in frozen_cids.into_iter() { - match cid.into() { - CidVariant::V1DagCborBlake2b(bytes) => { - cids.push(Cid::from(CidVariant::V1DagCborBlake2b(bytes))) - } - _ => cids.push(cid), - } - } - cids + Vec::from_iter(frozen_cids.into_iter()) } } @@ -88,8 +74,7 @@ impl FrozenCids { } pub fn contains(&self, cid: Cid) -> bool { - let cid = CidVariant::from(cid); - self.0.contains(&cid) + self.0.contains(&SmallCid::from(cid)) } } diff --git a/src/utils/cid/mod.rs b/src/utils/cid/mod.rs index bc466749443e..098f8641987c 100644 --- a/src/utils/cid/mod.rs +++ b/src/utils/cid/mod.rs @@ -31,20 +31,37 @@ impl CidCborExt for Cid {} pub const BLAKE2B256_SIZE: usize = 32; -/// `CidVariant` is an enumeration of known CID types that are used in the Filecoin blockchain. CIDs +/// `SmallCid` encapsulates an enumeration of known CID types that are used in the Filecoin blockchain. CIDs /// contain a significant amount of static data (such as version, codec, hash identifier, hash /// length). This static data represented by a single tag in the `enum`. /// /// Nearly all Filecoin CIDs are `V1`,`DagCbor` encoded, and hashed with `Blake2b256` (which has a hash -/// length of 256 bits). Naively representing such a CID requires 96 bytes but `CidVariant` does it in -/// only 40 bytes. If other types of CID become popular, they can be added to the `CidVariant` +/// length of 256 bits). Naively representing such a CID requires 96 bytes but `SmallCid` does it in +/// only 40 bytes. If other types of CID become popular, they can be added to the `SmallCid` /// structure. /// /// The `Generic` variant is used for CIDs that do not fit into the other variants. /// These variants are used for optimizing storage of CIDs in the `FrozenCids` structure. #[cfg_attr(test, derive(derive_quickcheck_arbitrary::Arbitrary))] #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum CidVariant { +pub struct SmallCid(SmallCidInner); + +impl SmallCid { + pub fn cid(&self) -> Cid { + match &self.0 { + SmallCidInner::Generic(cid) => **cid, + SmallCidInner::V1DagCborBlake2b(digest) => Cid::new_v1( + DAG_CBOR, + multihash::Multihash::wrap(Blake2b256.into(), digest) + .expect("failed to convert Blake2b digest to Multihash for creation of V1 DAG-CBOR Blake2b CID"), + ), + } + } +} + +#[cfg_attr(test, derive(derive_quickcheck_arbitrary::Arbitrary))] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +enum SmallCidInner { Generic(Box), V1DagCborBlake2b( #[cfg_attr(test, arbitrary(gen(|g: &mut quickcheck::Gen| std::array::from_fn(|_ix| Arbitrary::arbitrary(g)))))] @@ -52,16 +69,16 @@ pub enum CidVariant { ), } -impl Serialize for CidVariant { +impl Serialize for SmallCid { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - Cid::from(self).serialize(serializer) + self.cid().serialize(serializer) } } -impl<'de> Deserialize<'de> for CidVariant { +impl<'de> Deserialize<'de> for SmallCid { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, @@ -70,34 +87,34 @@ impl<'de> Deserialize<'de> for CidVariant { } } -impl From for CidVariant { +impl From for SmallCid { fn from(cid: Cid) -> Self { if cid.version() == Version::V1 && cid.codec() == DAG_CBOR { if let Ok(small_hash) = cid.hash().resize() { let (code, bytes, size) = small_hash.into_inner(); if code == u64::from(Code::Blake2b256) && size as usize == BLAKE2B256_SIZE { - return CidVariant::V1DagCborBlake2b(bytes); + return SmallCid(SmallCidInner::V1DagCborBlake2b(bytes)); } } } - CidVariant::Generic(Box::new(cid)) + SmallCid(SmallCidInner::Generic(Box::new(cid))) } } -impl From for Cid { - fn from(variant: CidVariant) -> Self { +impl From for Cid { + fn from(variant: SmallCid) -> Self { Cid::from(&variant) } } -impl From<&CidVariant> for Cid { - fn from(variant: &CidVariant) -> Self { +impl From<&SmallCid> for Cid { + fn from(variant: &SmallCid) -> Self { match variant { - CidVariant::Generic(cid) => **cid, - CidVariant::V1DagCborBlake2b(digest) => Cid::new_v1( + SmallCid(SmallCidInner::Generic(cid)) => **cid, + SmallCid(SmallCidInner::V1DagCborBlake2b(digest)) => Cid::new_v1( DAG_CBOR, multihash::Multihash::wrap(Blake2b256.into(), digest) - .expect("failed to convert Blake2b digest to V1 DAG-CBOR Blake2b CID"), + .expect("failed to convert Blake2b digest to Multihash for creation of V1 DAG-CBOR Blake2b CID"), ), } } @@ -105,7 +122,7 @@ impl From<&CidVariant> for Cid { #[cfg(test)] mod tests { - use super::CidVariant; + use super::SmallCid; use super::*; use crate::db::MemoryDB; use crate::utils::db::CborStoreExt; @@ -155,18 +172,18 @@ mod tests { .unwrap(); assert!(matches!( cid.try_into().unwrap(), - CidVariant::V1DagCborBlake2b(_) + SmallCid(SmallCidInner::V1DagCborBlake2b(_)) )); } - // If this test fails, the default encoding is no longer v1+dagcbor+blake2b. Add the new default - // CID type to `CidVariant`. + // If this test fails, the default encoding is no longer v1+dagcbor+blake2b. Consider adding the new default + // CID type to `SmallCid`. #[test] fn default_is_v1_dagcbor() { let cid = MemoryDB::default().put_cbor_default(&()).unwrap(); assert!(matches!( cid.try_into().unwrap(), - CidVariant::V1DagCborBlake2b(_) + SmallCid(SmallCidInner::V1DagCborBlake2b(_)) )); } }