Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 12 additions & 47 deletions src/ipld/cid_hashmap.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2019-2023 ChainSafe Systems
// SPDX-License-Identifier: Apache-2.0, MIT

use crate::utils::cid::{CidVariant, BLAKE2B256_SIZE};
use crate::utils::cid::SmallCid;
use ahash::{HashMap, HashMapExt};
use cid::Cid;

Expand All @@ -17,10 +17,7 @@ use cid::Cid;
// length=32. Taking advantage of this knowledge, we can store the vast majority of CIDs (+99.99%)
// in one third of the usual space (32 bytes vs 96 bytes).
#[derive(Clone, Debug, Default, PartialEq)]
pub struct CidHashMap<V> {
v1_dagcbor_blake2b_hash_map: HashMap<[u8; BLAKE2B256_SIZE], V>,
fallback_hash_map: HashMap<Cid, V>,
}
pub struct CidHashMap<V>(HashMap<SmallCid, V>);

impl<V> Extend<(Cid, V)> for CidHashMap<V> {
fn extend<T: IntoIterator<Item = (Cid, V)>>(&mut self, iter: T) {
Expand All @@ -38,91 +35,59 @@ impl<V> FromIterator<(Cid, V)> for CidHashMap<V> {
}
}

pub struct IntoIter<V> {
small: std::collections::hash_map::IntoIter<[u8; BLAKE2B256_SIZE], V>,
fallback: std::collections::hash_map::IntoIter<Cid, V>,
}
pub struct IntoIter<V>(std::collections::hash_map::IntoIter<SmallCid, V>);

impl<V> IntoIterator for CidHashMap<V> {
type Item = (Cid, V);
type IntoIter = IntoIter<V>;

fn into_iter(self) -> Self::IntoIter {
let CidHashMap {
v1_dagcbor_blake2b_hash_map,
fallback_hash_map,
} = self;
Self::IntoIter {
small: v1_dagcbor_blake2b_hash_map.into_iter(),
fallback: fallback_hash_map.into_iter(),
}
IntoIter(self.0.into_iter())
}
}

impl<V> Iterator for IntoIter<V> {
type Item = (Cid, V);
fn next(&mut self) -> Option<Self::Item> {
self.small
.next()
.map(|(bytes, v)| (Cid::from(CidVariant::V1DagCborBlake2b(bytes)), v))
.or_else(|| self.fallback.next())
self.0.next().map(|(small_cid, v)| (small_cid.cid(), v))
}
}

impl<V> CidHashMap<V> {
/// Creates an empty `HashMap` with CID type keys.
pub fn new() -> Self {
Self {
v1_dagcbor_blake2b_hash_map: HashMap::new(),
fallback_hash_map: HashMap::new(),
}
Self(HashMap::new())
}

/// Returns `true` if the map contains a value for the specified key.
pub fn contains_key(&self, k: Cid) -> bool {
match k.into() {
CidVariant::V1DagCborBlake2b(bytes) => {
self.v1_dagcbor_blake2b_hash_map.contains_key(&bytes)
}
CidVariant::Generic(_) => self.fallback_hash_map.contains_key(&k),
}
self.0.contains_key(&SmallCid::from(k))
}

/// Inserts a key-value pair into the map; if the map did not have this key present, [`None`] is returned.
pub fn insert(&mut self, k: Cid, v: V) -> Option<V> {
match k.into() {
CidVariant::V1DagCborBlake2b(bytes) => {
self.v1_dagcbor_blake2b_hash_map.insert(bytes, v)
}
CidVariant::Generic(_) => self.fallback_hash_map.insert(k, v),
}
self.0.insert(SmallCid::from(k), v)
}

/// Removes a key from the map, returning the value at the key if the key
/// was previously in the map.
pub fn remove(&mut self, k: Cid) -> Option<V> {
match k.into() {
CidVariant::V1DagCborBlake2b(bytes) => self.v1_dagcbor_blake2b_hash_map.remove(&bytes),
CidVariant::Generic(_) => self.fallback_hash_map.remove(&k),
}
self.0.remove(&SmallCid::from(k))
}

/// Returns the number of elements the map can hold without reallocating.
pub fn capacity(&self) -> usize {
self.v1_dagcbor_blake2b_hash_map.capacity() + self.fallback_hash_map.capacity()
self.0.capacity()
}

/// Returns a reference to the value corresponding to the key.
pub fn get(&self, k: Cid) -> Option<&V> {
match k.into() {
CidVariant::V1DagCborBlake2b(bytes) => self.v1_dagcbor_blake2b_hash_map.get(&bytes),
CidVariant::Generic(_) => self.fallback_hash_map.get(&k),
}
self.0.get(&SmallCid::from(k))
}

/// Returns the number of elements in the map.
pub fn len(&self) -> usize {
self.v1_dagcbor_blake2b_hash_map.len() + self.fallback_hash_map.len()
self.0.len()
}
}

Expand Down
31 changes: 8 additions & 23 deletions src/ipld/frozen_cids.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
// Copyright 2019-2023 ChainSafe Systems
// SPDX-License-Identifier: Apache-2.0, MIT

use crate::utils::cid::CidVariant;
use crate::utils::cid::SmallCid;
use cid::Cid;
use serde::{Deserialize, Serialize};

/// A wrapper around `Box<[CidVariant]>` with a [`Cid`]-friendly API:
/// - Uses [`CidVariant`] over [`Cid`] to save memory on common CIDs - see docs for that type for more
/// A wrapper around `Box<[SmallCid]>` with a [`Cid`]-friendly API:
/// - Uses [`SmallCid`] over [`Cid`] to save memory on common CIDs - see docs for that type for more
/// - Uses `Box<[...]>`, over `Vec<...>` avoiding vector overallocation
///
/// There will be MANY small collections of [`FrozenCids`] over the codebase, so these space savings matter
Expand All @@ -16,11 +16,11 @@ pub struct FrozenCids(
#[cfg_attr(test, arbitrary(gen(
|g| Vec::arbitrary(g).into_boxed_slice()
)))]
Box<[CidVariant]>,
Box<[SmallCid]>,
);

pub struct Iter<'a> {
cids: std::slice::Iter<'a, CidVariant>,
cids: std::slice::Iter<'a, SmallCid>,
}

impl<'a> IntoIterator for &'a FrozenCids {
Expand Down Expand Up @@ -50,12 +50,7 @@ impl From<Vec<Cid>> for FrozenCids {
fn from(cids: Vec<Cid>) -> Self {
let mut small_cids = Vec::with_capacity(cids.len());
for cid in cids {
match cid.into() {
CidVariant::V1DagCborBlake2b(bytes) => {
small_cids.push(CidVariant::V1DagCborBlake2b(bytes))
}
_ => small_cids.push(CidVariant::Generic(Box::new(cid))),
}
small_cids.push(SmallCid::from(cid));
}
FrozenCids(small_cids.into_boxed_slice())
}
Expand All @@ -69,16 +64,7 @@ impl From<FrozenCids> for Vec<Cid> {

impl From<&FrozenCids> for Vec<Cid> {
fn from(frozen_cids: &FrozenCids) -> Self {
let mut cids = Vec::with_capacity(frozen_cids.0.len());
for cid in frozen_cids.into_iter() {
match cid.into() {
CidVariant::V1DagCborBlake2b(bytes) => {
cids.push(Cid::from(CidVariant::V1DagCborBlake2b(bytes)))
}
_ => cids.push(cid),
}
}
cids
Vec::from_iter(frozen_cids.into_iter())
}
}

Expand All @@ -88,8 +74,7 @@ impl FrozenCids {
}

pub fn contains(&self, cid: Cid) -> bool {
let cid = CidVariant::from(cid);
self.0.contains(&cid)
self.0.contains(&SmallCid::from(cid))
}
}

Expand Down
61 changes: 39 additions & 22 deletions src/utils/cid/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,37 +31,54 @@ impl CidCborExt for Cid {}

pub const BLAKE2B256_SIZE: usize = 32;

/// `CidVariant` is an enumeration of known CID types that are used in the Filecoin blockchain. CIDs
/// `SmallCid` encapsulates an enumeration of known CID types that are used in the Filecoin blockchain. CIDs
/// contain a significant amount of static data (such as version, codec, hash identifier, hash
/// length). This static data represented by a single tag in the `enum`.
///
/// Nearly all Filecoin CIDs are `V1`,`DagCbor` encoded, and hashed with `Blake2b256` (which has a hash
/// length of 256 bits). Naively representing such a CID requires 96 bytes but `CidVariant` does it in
/// only 40 bytes. If other types of CID become popular, they can be added to the `CidVariant`
/// length of 256 bits). Naively representing such a CID requires 96 bytes but `SmallCid` does it in
/// only 40 bytes. If other types of CID become popular, they can be added to the `SmallCid`
/// structure.
///
/// The `Generic` variant is used for CIDs that do not fit into the other variants.
/// These variants are used for optimizing storage of CIDs in the `FrozenCids` structure.
#[cfg_attr(test, derive(derive_quickcheck_arbitrary::Arbitrary))]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum CidVariant {
pub struct SmallCid(SmallCidInner);

impl SmallCid {
pub fn cid(&self) -> Cid {
match &self.0 {
SmallCidInner::Generic(cid) => **cid,
SmallCidInner::V1DagCborBlake2b(digest) => Cid::new_v1(
DAG_CBOR,
multihash::Multihash::wrap(Blake2b256.into(), digest)
.expect("failed to convert Blake2b digest to Multihash for creation of V1 DAG-CBOR Blake2b CID"),
),
}
}
}

#[cfg_attr(test, derive(derive_quickcheck_arbitrary::Arbitrary))]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum SmallCidInner {
Generic(Box<Cid>),
V1DagCborBlake2b(
#[cfg_attr(test, arbitrary(gen(|g: &mut quickcheck::Gen| std::array::from_fn(|_ix| Arbitrary::arbitrary(g)))))]
[u8; BLAKE2B256_SIZE],
),
}

impl Serialize for CidVariant {
impl Serialize for SmallCid {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
Cid::from(self).serialize(serializer)
self.cid().serialize(serializer)
}
}

impl<'de> Deserialize<'de> for CidVariant {
impl<'de> Deserialize<'de> for SmallCid {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
Expand All @@ -70,42 +87,42 @@ impl<'de> Deserialize<'de> for CidVariant {
}
}

impl From<Cid> for CidVariant {
impl From<Cid> for SmallCid {
fn from(cid: Cid) -> Self {
if cid.version() == Version::V1 && cid.codec() == DAG_CBOR {
if let Ok(small_hash) = cid.hash().resize() {
let (code, bytes, size) = small_hash.into_inner();
if code == u64::from(Code::Blake2b256) && size as usize == BLAKE2B256_SIZE {
return CidVariant::V1DagCborBlake2b(bytes);
return SmallCid(SmallCidInner::V1DagCborBlake2b(bytes));
}
}
}
CidVariant::Generic(Box::new(cid))
SmallCid(SmallCidInner::Generic(Box::new(cid)))
}
}

impl From<CidVariant> for Cid {
fn from(variant: CidVariant) -> Self {
impl From<SmallCid> for Cid {
fn from(variant: SmallCid) -> Self {
Cid::from(&variant)
}
}

impl From<&CidVariant> for Cid {
fn from(variant: &CidVariant) -> Self {
impl From<&SmallCid> for Cid {
fn from(variant: &SmallCid) -> Self {
match variant {
CidVariant::Generic(cid) => **cid,
CidVariant::V1DagCborBlake2b(digest) => Cid::new_v1(
SmallCid(SmallCidInner::Generic(cid)) => **cid,
SmallCid(SmallCidInner::V1DagCborBlake2b(digest)) => Cid::new_v1(
DAG_CBOR,
multihash::Multihash::wrap(Blake2b256.into(), digest)
.expect("failed to convert Blake2b digest to V1 DAG-CBOR Blake2b CID"),
.expect("failed to convert Blake2b digest to Multihash for creation of V1 DAG-CBOR Blake2b CID"),
),
}
}
}

#[cfg(test)]
mod tests {
use super::CidVariant;
use super::SmallCid;
use super::*;
use crate::db::MemoryDB;
use crate::utils::db::CborStoreExt;
Expand Down Expand Up @@ -155,18 +172,18 @@ mod tests {
.unwrap();
assert!(matches!(
cid.try_into().unwrap(),
CidVariant::V1DagCborBlake2b(_)
SmallCid(SmallCidInner::V1DagCborBlake2b(_))
));
}

// If this test fails, the default encoding is no longer v1+dagcbor+blake2b. Add the new default
// CID type to `CidVariant`.
// If this test fails, the default encoding is no longer v1+dagcbor+blake2b. Consider adding the new default
// CID type to `SmallCid`.
#[test]
fn default_is_v1_dagcbor() {
let cid = MemoryDB::default().put_cbor_default(&()).unwrap();
assert!(matches!(
cid.try_into().unwrap(),
CidVariant::V1DagCborBlake2b(_)
SmallCid(SmallCidInner::V1DagCborBlake2b(_))
));
}
}