From 9e1a51c7e392000fbc2d8cc1119c9b38a319089d Mon Sep 17 00:00:00 2001 From: Evgeny Fomin Date: Wed, 22 Dec 2021 18:45:31 +0300 Subject: [PATCH 1/3] prefixed storage raw iterator --- storage/src/rocksdb_storage.rs | 101 ++++++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 9 deletions(-) diff --git a/storage/src/rocksdb_storage.rs b/storage/src/rocksdb_storage.rs index 541e4cb8a..66b8ec2a6 100644 --- a/storage/src/rocksdb_storage.rs +++ b/storage/src/rocksdb_storage.rs @@ -96,7 +96,7 @@ impl PrefixedRocksDbStorage { impl Storage for PrefixedRocksDbStorage { type Batch<'a> = PrefixedRocksDbBatch<'a>; type Error = PrefixedRocksDbStorageError; - type RawIterator<'a> = rocksdb::DBRawIterator<'a>; + type RawIterator<'a> = RawPrefixedIterator<'a>; fn put(&self, key: &[u8], value: &[u8]) -> Result<(), Self::Error> { self.db @@ -191,33 +191,56 @@ impl Storage for PrefixedRocksDbStorage { } fn raw_iter<'a>(&'a self) -> Self::RawIterator<'a> { - self.db.raw_iterator() + RawPrefixedIterator { + rocksdb_iterator: self.db.raw_iterator(), + prefix: &self.prefix, + } } } -impl RawIterator for rocksdb::DBRawIterator<'_> { +pub struct RawPrefixedIterator<'a> { + rocksdb_iterator: DBRawIterator<'a>, + prefix: &'a [u8], +} + +impl RawIterator for RawPrefixedIterator<'_> { fn seek_to_first(&mut self) { - DBRawIterator::seek_to_first(self) + self.rocksdb_iterator.seek(self.prefix); } fn seek(&mut self, key: &[u8]) { - DBRawIterator::seek(self, key) + self.rocksdb_iterator + .seek(make_prefixed_key(self.prefix.to_vec(), key)); } fn next(&mut self) { - DBRawIterator::next(self) + self.rocksdb_iterator.next(); } fn value(&self) -> Option<&[u8]> { - DBRawIterator::value(self) + if self.valid() { + self.rocksdb_iterator.value() + } else { + None + } } fn key(&self) -> Option<&[u8]> { - DBRawIterator::key(self) + if self.valid() { + self.rocksdb_iterator + .key() + .map(|k| k.split_at(self.prefix.len()).1) + } else { + None + } } fn valid(&self) -> bool { - DBRawIterator::valid(self) + // TODO: things may break if we continue concat prefix and key just like that + self.rocksdb_iterator + .key() + .map(|k| k.starts_with(self.prefix)) + .unwrap_or(false) } } @@ -488,4 +511,64 @@ mod tests { b"yeet" ); } + + #[test] + fn test_raw_iterator() { + let tmp_dir = TempDir::new("test_raw_iterator").expect("unable to open a tempdir"); + let db = default_rocksdb(tmp_dir.path()); + + let storage = PrefixedRocksDbStorage::new(db.clone(), b"someprefix".to_vec()) + .expect("cannot create a prefixed storage"); + storage + .put(b"key1", b"value1") + .expect("expected successful insertion"); + storage + .put(b"key0", b"value0") + .expect("expected successful insertion"); + storage + .put(b"key3", b"value3") + .expect("expected successful insertion"); + storage + .put(b"key2", b"value2") + .expect("expected successful insertion"); + + // Other storages are required to put something into rocksdb with other prefix + // to see if there will be any conflicts and boundaries are met + let another_storage_before = + PrefixedRocksDbStorage::new(db.clone(), b"anothersomeprefix".to_vec()) + .expect("cannot create a prefixed storage"); + another_storage_before + .put(b"key1", b"value1") + .expect("expected successful insertion"); + another_storage_before + .put(b"key5", b"value5") + .expect("expected successful insertion"); + let another_storage_after = PrefixedRocksDbStorage::new(db, b"zanothersomeprefix".to_vec()) + .expect("cannot create a prefixed storage"); + another_storage_after + .put(b"key1", b"value1") + .expect("expected successful insertion"); + another_storage_after + .put(b"key5", b"value5") + .expect("expected successful insertion"); + + let expected: [(&'static [u8], &'static [u8]); 4] = [ + (b"key0", b"value0"), + (b"key1", b"value1"), + (b"key2", b"value2"), + (b"key3", b"value3"), + ]; + let mut expected_iter = expected.into_iter(); + + let mut iter = storage.raw_iter(); + iter.seek_to_first(); + while iter.valid() { + assert_eq!( + (iter.key().unwrap(), iter.value().unwrap()), + expected_iter.next().unwrap() + ); + iter.next(); + } + assert!(expected_iter.next().is_none()); + } } From af2722e75a3d4fad1213c9e035519ca57bdf8865 Mon Sep 17 00:00:00 2001 From: Evgeny Fomin Date: Wed, 22 Dec 2021 21:44:50 +0300 Subject: [PATCH 2/3] elements iterator for GroveDB subtree --- grovedb/src/lib.rs | 8 ++++++ grovedb/src/subtree.rs | 36 +++++++++++++++++++++++-- grovedb/src/tests.rs | 61 ++++++++++++++++++++++++++++++++++++++++++ merk/src/merk/mod.rs | 7 +++-- 4 files changed, 108 insertions(+), 4 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 2aa0c0f0c..7d66cc50a 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -235,6 +235,14 @@ impl GroveDb { } } + pub fn elements_iterator(&self, path: &[&[u8]]) -> Result { + let merk = self + .subtrees + .get(&Self::compress_path(path, None)) + .ok_or(Error::InvalidPath("no subtree found under that path"))?; + Ok(Element::iterator(merk.raw_iter())) + } + /// Get tree item without following references fn get_raw(&self, path: &[&[u8]], key: &[u8]) -> Result { let merk = self diff --git a/grovedb/src/subtree.rs b/grovedb/src/subtree.rs index 5e479be32..95997a540 100644 --- a/grovedb/src/subtree.rs +++ b/grovedb/src/subtree.rs @@ -1,9 +1,12 @@ //! Module for subtrees handling. //! Subtrees handling is isolated so basically this module is about adapting //! Merk API to GroveDB needs. -use merk::Op; +use merk::{tree::Tree, Op}; use serde::{Deserialize, Serialize}; -use storage::rocksdb_storage::PrefixedRocksDbStorage; +use storage::{ + rocksdb_storage::{PrefixedRocksDbStorage, RawPrefixedIterator}, + RawIterator, Store, +}; use crate::{Error, Merk}; @@ -56,6 +59,35 @@ impl Element { merk.apply(&batch, &[]) .map_err(|e| Error::CorruptedData(e.to_string())) } + + pub fn iterator(mut raw_iter: RawPrefixedIterator) -> ElementsIterator { + raw_iter.seek_to_first(); + ElementsIterator { raw_iter } + } +} + +pub struct ElementsIterator<'a> { + raw_iter: RawPrefixedIterator<'a>, +} + +impl<'a> ElementsIterator<'a> { + pub fn next(&mut self) -> Result, Error> { + Ok(if self.raw_iter.valid() { + self.raw_iter.next(); + if let Some((key, value)) = self.raw_iter.key().zip(self.raw_iter.value()) { + let tree = ::decode(value) + .map_err(|e| Error::CorruptedData(e.to_string()))?; + let element: Element = bincode::deserialize(tree.value()).map_err(|_| { + Error::CorruptedData(String::from("unable to deserialize element")) + })?; + Some((key, element)) + } else { + None + } + } else { + None + }) + } } #[cfg(test)] diff --git a/grovedb/src/tests.rs b/grovedb/src/tests.rs index ee03c7ae2..c46381f59 100644 --- a/grovedb/src/tests.rs +++ b/grovedb/src/tests.rs @@ -407,3 +407,64 @@ fn test_insert_if_not_exists() { ); assert!(matches!(result, Err(Error::InvalidPath(_)))); } + +#[test] +fn test_subtree_pairs_iterator() { + let mut db = make_grovedb(); + let element = Element::Item(b"ayy".to_vec()); + let element2 = Element::Item(b"lmao".to_vec()); + + // Insert some nested subtrees + db.insert(&[TEST_LEAF], b"subtree1".to_vec(), Element::empty_tree()) + .expect("successful subtree 1 insert"); + db.insert( + &[TEST_LEAF, b"subtree1"], + b"subtree11".to_vec(), + Element::empty_tree(), + ) + .expect("successful subtree 2 insert"); + // Insert an element into subtree + db.insert( + &[TEST_LEAF, b"subtree1", b"subtree11"], + b"key1".to_vec(), + element.clone(), + ) + .expect("successful value insert"); + assert_eq!( + db.get(&[TEST_LEAF, b"subtree1", b"subtree11"], b"key1") + .expect("succesful get 1"), + element + ); + db.insert( + &[TEST_LEAF, b"subtree1", b"subtree11"], + b"key0".to_vec(), + element.clone(), + ) + .expect("successful value insert"); + db.insert( + &[TEST_LEAF, b"subtree1"], + b"subtree12".to_vec(), + Element::empty_tree(), + ) + .expect("successful subtree 3 insert"); + db.insert(&[TEST_LEAF, b"subtree1"], b"key1".to_vec(), element.clone()) + .expect("succesful value insert"); + db.insert( + &[TEST_LEAF, b"subtree1"], + b"key2".to_vec(), + element2.clone(), + ) + .expect("succesful value insert"); + + // Iterate over subtree1 to see if keys of other subtrees messed up + let mut iter = db + .elements_iterator(&[TEST_LEAF, b"subtree1"]) + .expect("cannot create iterator"); + assert!(matches!(iter.next(), Ok(Some((b"key1", element1))))); + assert!(matches!(iter.next(), Ok(Some((b"key2", element2))))); + assert!(matches!( + iter.next(), + Ok(Some((b"subtree11", Element::Tree(_)))) + )); + // assert!(matches!(iter.next(), Ok(None))); +} diff --git a/merk/src/merk/mod.rs b/merk/src/merk/mod.rs index e77c422ee..00ff9d6c3 100644 --- a/merk/src/merk/mod.rs +++ b/merk/src/merk/mod.rs @@ -372,7 +372,10 @@ impl Commit for MerkCommitter { #[cfg(test)] mod test { - use storage::rocksdb_storage::{default_rocksdb, PrefixedRocksDbStorage}; + use storage::{ + rocksdb_storage::{default_rocksdb, PrefixedRocksDbStorage}, + RawIterator, + }; use tempdir::TempDir; use super::{Merk, MerkSource, RefWalker}; @@ -539,7 +542,7 @@ mod test { #[test] fn reopen_iter() { - fn collect(iter: &mut rocksdb::DBRawIterator, nodes: &mut Vec<(Vec, Vec)>) { + fn collect(iter: &mut impl RawIterator, nodes: &mut Vec<(Vec, Vec)>) { while iter.valid() { nodes.push((iter.key().unwrap().to_vec(), iter.value().unwrap().to_vec())); iter.next(); From faae8ae9e16fdcd9396a9babe4a8f5149722023d Mon Sep 17 00:00:00 2001 From: Evgeny Fomin Date: Thu, 23 Dec 2021 21:04:58 +0300 Subject: [PATCH 3/3] prevent collisions for paths compression --- grovedb/src/lib.rs | 42 ++++++++++++++++++++++------------ grovedb/src/subtree.rs | 5 ++-- grovedb/src/tests.rs | 35 ++++++++++++++++++++-------- storage/src/rocksdb_storage.rs | 1 - 4 files changed, 55 insertions(+), 28 deletions(-) diff --git a/grovedb/src/lib.rs b/grovedb/src/lib.rs index 7d66cc50a..68049b7af 100644 --- a/grovedb/src/lib.rs +++ b/grovedb/src/lib.rs @@ -10,7 +10,7 @@ use std::{ pub use merk::proofs::{query::QueryItem, Query}; use merk::{self, Merk}; -use rs_merkle::{algorithms::Sha256, MerkleTree}; +use rs_merkle::{algorithms::Sha256, Hasher, MerkleTree}; use storage::{ rocksdb_storage::{PrefixedRocksDbStorage, PrefixedRocksDbStorageError}, Storage, @@ -141,13 +141,12 @@ impl GroveDb { key: Vec, mut element: subtree::Element, ) -> Result<(), Error> { - let compressed_path = Self::compress_path(path, None); match &mut element { Element::Tree(subtree_root_hash) => { // Helper closure to create a new subtree under path + key let create_subtree_merk = || -> Result<(Vec, Merk), Error> { - let compressed_path_subtree = Self::compress_path(path, Some(&key)); + let compressed_path_subtree = Self::compress_subtree_key(&path, Some(&key)); Ok(( compressed_path_subtree.clone(), Merk::open(PrefixedRocksDbStorage::new( @@ -173,6 +172,7 @@ impl GroveDb { } self.propagate_changes(&[&key])?; } else { + let compressed_path = Self::compress_subtree_key(path, None); // Add subtree to another subtree. // First, check if a subtree exists to create a new subtree under it self.subtrees @@ -204,7 +204,7 @@ impl GroveDb { // Get a Merk by a path let mut merk = self .subtrees - .get_mut(&compressed_path) + .get_mut(&Self::compress_subtree_key(path, None)) .ok_or(Error::InvalidPath("no subtree found under that path"))?; element.insert(&mut merk, key)?; self.propagate_changes(path)?; @@ -238,7 +238,7 @@ impl GroveDb { pub fn elements_iterator(&self, path: &[&[u8]]) -> Result { let merk = self .subtrees - .get(&Self::compress_path(path, None)) + .get(&Self::compress_subtree_key(path, None)) .ok_or(Error::InvalidPath("no subtree found under that path"))?; Ok(Element::iterator(merk.raw_iter())) } @@ -247,7 +247,7 @@ impl GroveDb { fn get_raw(&self, path: &[&[u8]], key: &[u8]) -> Result { let merk = self .subtrees - .get(&Self::compress_path(path, None)) + .get(&Self::compress_subtree_key(path, None)) .ok_or(Error::InvalidPath("no subtree found under that path"))?; Element::get(&merk, key) } @@ -296,7 +296,7 @@ impl GroveDb { // Get proof for root tree at current key let root_key_index = self .root_leaf_keys - .get(*key) + .get(&Self::compress_subtree_key(&[key], None)) .ok_or(Error::InvalidPath("root key not found"))?; proofs.push(self.root_tree.proof(&[*root_key_index]).to_bytes()); } else { @@ -319,7 +319,7 @@ impl GroveDb { fn prove_item(&self, path: &[&[u8]], proof_query: Query) -> Result, Error> { let merk = self .subtrees - .get(&Self::compress_path(path, None)) + .get(&Self::compress_subtree_key(path, None)) .ok_or(Error::InvalidPath("no subtree found under that path"))?; let proof_result = merk @@ -339,8 +339,8 @@ impl GroveDb { self.root_tree = Self::build_root_tree(&self.subtrees, &self.root_leaf_keys); break; } else { - let compressed_path_upper_tree = Self::compress_path(path_slice, None); - let compressed_path_subtree = Self::compress_path(path_slice, Some(key)); + let compressed_path_upper_tree = Self::compress_subtree_key(path_slice, None); + let compressed_path_subtree = Self::compress_subtree_key(path_slice, Some(key)); let subtree = self .subtrees .get(&compressed_path_subtree) @@ -359,14 +359,26 @@ impl GroveDb { /// A helper method to build a prefix to rocksdb keys or identify a subtree /// in `subtrees` map by tree path; - fn compress_path(path: &[&[u8]], key: Option<&[u8]>) -> Vec { - let mut res = path.iter().fold(Vec::::new(), |mut acc, p| { + fn compress_subtree_key(path: &[&[u8]], key: Option<&[u8]>) -> Vec { + let segments_iter = path.into_iter().map(|x| *x).chain(key.into_iter()); + let mut segments_count = path.len(); + if key.is_some() { + segments_count += 1; + } + let mut res = segments_iter.fold(Vec::::new(), |mut acc, p| { acc.extend(p.into_iter()); acc }); - if let Some(k) = key { - res.extend_from_slice(k); - } + + res.extend(segments_count.to_ne_bytes()); + path.into_iter() + .map(|x| *x) + .chain(key.into_iter()) + .fold(&mut res, |acc, p| { + acc.extend(p.len().to_ne_bytes()); + acc + }); + res = Sha256::hash(&res).to_vec(); res } } diff --git a/grovedb/src/subtree.rs b/grovedb/src/subtree.rs index 95997a540..e6fe576b2 100644 --- a/grovedb/src/subtree.rs +++ b/grovedb/src/subtree.rs @@ -71,15 +71,16 @@ pub struct ElementsIterator<'a> { } impl<'a> ElementsIterator<'a> { - pub fn next(&mut self) -> Result, Error> { + pub fn next(&mut self) -> Result, Element)>, Error> { Ok(if self.raw_iter.valid() { - self.raw_iter.next(); if let Some((key, value)) = self.raw_iter.key().zip(self.raw_iter.value()) { let tree = ::decode(value) .map_err(|e| Error::CorruptedData(e.to_string()))?; let element: Element = bincode::deserialize(tree.value()).map_err(|_| { Error::CorruptedData(String::from("unable to deserialize element")) })?; + let key = key.to_vec(); + self.raw_iter.next(); Some((key, element)) } else { None diff --git a/grovedb/src/tests.rs b/grovedb/src/tests.rs index c46381f59..cb0fccabd 100644 --- a/grovedb/src/tests.rs +++ b/grovedb/src/tests.rs @@ -235,8 +235,8 @@ fn test_tree_structure_is_presistent() { fn test_root_tree_leafs_are_noted() { let db = make_grovedb(); let mut hm = HashMap::new(); - hm.insert(TEST_LEAF.to_vec(), 0); - hm.insert(ANOTHER_TEST_LEAF.to_vec(), 1); + hm.insert(GroveDb::compress_subtree_key(&[TEST_LEAF], None), 0); + hm.insert(GroveDb::compress_subtree_key(&[ANOTHER_TEST_LEAF], None), 1); assert_eq!(db.root_leaf_keys, hm); assert_eq!(db.root_tree.leaves_len(), 2); } @@ -318,7 +318,6 @@ fn test_checkpoint() { .expect("cannot insert a subtree 2 into GroveDB"); db.insert(&[b"key1", b"key2"], b"key3".to_vec(), element1.clone()) .expect("cannot insert an item into GroveDB"); - assert_eq!( db.get(&[b"key1", b"key2"], b"key3") .expect("cannot get from grovedb"), @@ -460,11 +459,27 @@ fn test_subtree_pairs_iterator() { let mut iter = db .elements_iterator(&[TEST_LEAF, b"subtree1"]) .expect("cannot create iterator"); - assert!(matches!(iter.next(), Ok(Some((b"key1", element1))))); - assert!(matches!(iter.next(), Ok(Some((b"key2", element2))))); - assert!(matches!( - iter.next(), - Ok(Some((b"subtree11", Element::Tree(_)))) - )); - // assert!(matches!(iter.next(), Ok(None))); + assert_eq!(iter.next().unwrap(), Some((b"key1".to_vec(), element))); + assert_eq!(iter.next().unwrap(), Some((b"key2".to_vec(), element2))); + let subtree_element = iter.next().unwrap().unwrap(); + assert_eq!(subtree_element.0, b"subtree11".to_vec()); + assert!(matches!(subtree_element.1, Element::Tree(_))); + let subtree_element = iter.next().unwrap().unwrap(); + assert_eq!(subtree_element.0, b"subtree12".to_vec()); + assert!(matches!(subtree_element.1, Element::Tree(_))); + assert!(matches!(iter.next(), Ok(None))); +} + +#[test] +fn test_compress_path_not_possible_collision() { + let path_a = [b"aa".as_ref(), b"b"]; + let path_b = [b"a".as_ref(), b"ab"]; + assert_ne!( + GroveDb::compress_subtree_key(&path_a, None), + GroveDb::compress_subtree_key(&path_b, None) + ); + assert_eq!( + GroveDb::compress_subtree_key(&path_a, None), + GroveDb::compress_subtree_key(&path_a, None), + ); } diff --git a/storage/src/rocksdb_storage.rs b/storage/src/rocksdb_storage.rs index 66b8ec2a6..75725341d 100644 --- a/storage/src/rocksdb_storage.rs +++ b/storage/src/rocksdb_storage.rs @@ -236,7 +236,6 @@ impl RawIterator for RawPrefixedIterator<'_> { } fn valid(&self) -> bool { - // TODO: things may break if we continue concat prefix and key just like that self.rocksdb_iterator .key() .map(|k| k.starts_with(self.prefix))