diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e07e7b726..8a4f81f96 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,7 +34,7 @@ jobs: with: files: ./coverage-output/lcov.info name: unit_tests - fail_ci_if_error: true + fail_ci_if_error: false # Build Stacks 2.1 for tests that require stacks-node build-stacks-2_1: runs-on: ubuntu-latest @@ -120,7 +120,7 @@ jobs: with: files: ./coverage-output/lcov.info name: ${{ matrix.test-name }} - fail_ci_if_error: true + fail_ci_if_error: false # Run integration tests integration-tests: runs-on: ubuntu-latest @@ -137,7 +137,7 @@ jobs: with: files: ./coverage-output/lcov.info name: integration_tests - fail_ci_if_error: true + fail_ci_if_error: false open-api-validation: @@ -196,6 +196,7 @@ jobs: uses: codecov/codecov-action@v1 with: files: ./coverage.lcov + fail_ci_if_error: false verbose: true # Build docker image, tag it with the git tag and `latest` if running on master branch, and publish under the following conditions diff --git a/Cargo.lock b/Cargo.lock index 29ce53e1f..ace8d1674 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2637,6 +2637,19 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" +[[package]] +name = "soar-db" +version = "0.1.0" +dependencies = [ + "clarity", + "lazy_static", + "serde", + "serde_derive", + "serde_json", + "slog", + "stacks-common", +] + [[package]] name = "socket2" version = "0.4.4" diff --git a/Cargo.toml b/Cargo.toml index 25392b35e..34fcefa35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -165,4 +165,4 @@ sha2 = { version = "0.10", features = ["asm"] } sha2 = { version = "0.10" } [workspace] -members = [".", "testnet/stacks-node", "testnet/puppet-chain"] +members = [".", "testnet/stacks-node", "testnet/puppet-chain", "soar-db"] diff --git a/soar-db/Cargo.toml b/soar-db/Cargo.toml new file mode 100644 index 000000000..4a94d7f44 --- /dev/null +++ b/soar-db/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "soar-db" +version = "0.1.0" +authors = ["Aaron Blankstein "] +edition = "2021" +resolver = "2" + +[dependencies] +lazy_static = "1.4.0" +serde = "1" +serde_derive = "1" +serde_json = { version = "1.0", features = ["arbitrary_precision", "raw_value"] } +slog = { version = "2.5.2", features = [ "max_level_trace" ] } +clarity = { git = "https://github.com/stacks-network/stacks-blockchain.git", rev = "580e811223a389d38071dcb9fde79ab55cd6f685" } +stacks-common = { git = "https://github.com/stacks-network/stacks-blockchain.git", rev = "580e811223a389d38071dcb9fde79ab55cd6f685" } +# clarity = { package = "clarity", path = "../../../stacks-blockchain-develop/clarity" } +# stacks-common = { package = "stacks-common", path = "../stacks-blockchain-develop/stacks-common" } + +[lib] +name = "soar_db" +path = "src/lib.rs" + +[features] +default = [] diff --git a/soar-db/src/lib.rs b/soar-db/src/lib.rs new file mode 100644 index 000000000..9e64320f8 --- /dev/null +++ b/soar-db/src/lib.rs @@ -0,0 +1,219 @@ +//! (S)ubnets (O)ptimistic (A)daptive (R)eplay DB +//! +//! The SoarDB is an optimistic fork-aware data store (a replacement +//! for the MARF used in stacks-blockchain). +//! +//! The general idea with the datastore is to store the current data +//! view as a normal key-value store and track the history of +//! operations on the storage. When a fork occurs, the data state is +//! unwound and then replayed. + +extern crate clarity; +extern crate stacks_common; + +use crate::memory::MemoryBackingStore; +use stacks_common::types::chainstate::StacksBlockId; + +pub mod memory; + +#[cfg(test)] +pub mod tests; + +pub trait SoarBackingStore {} + +/// Key-Value Store with edit log +pub struct SoarDB { + storage: MemoryBackingStore, +} + +#[derive(Clone)] +/// Captures a key-value store's "put" operation, but is +/// *reversible*. The `prior_value` field stores the entry +/// being written over. +pub struct PutCommand { + key: String, + /// If a prior value existed for this entry, store it here + /// If this is the first value for the key, this will be None + prior_value: Option, + value: String, +} + +#[derive(PartialEq, Debug)] +pub enum SoarError { + NoParentBlock(&'static str), + BlockNotFound(StacksBlockId), + GenesisRewriteAttempted, + BlockHeightOverflow, + MismatchViewDuringRollback, + RollbackBeyondGenesis, +} + +impl SoarDB { + pub fn new_memory() -> SoarDB { + SoarDB { + storage: MemoryBackingStore::new(), + } + } + + /// If the DB has a block, then the current block should be returned + /// If there is *no* block data yet, this will return none + pub fn current_block(&self) -> Option<&StacksBlockId> { + self.storage.current_block() + } + + pub fn get_value(&self, key: &str) -> Result, SoarError> { + self.storage.get_value(key) + } + + /// Retarget the db to `block`, performing any unrolls or replays required to do so + pub fn set_block(&mut self, block: &StacksBlockId) -> Result<(), SoarError> { + // already pointed at the block, just return + if self.current_block() == Some(block) { + return Ok(()); + } + + // this block requires a rollback! + // Step 1: find the "fork point", which is the most recent common ancestor + // of `block` and `current_block()` + // + // We will do this by using the *block height* to walk backwards until the + // two ancestor paths meet. First, we find ancestors at the same height, + // then we loop until the ancestors are equal. + + if !self.storage.has_block(block) { + return Err(SoarError::BlockNotFound(block.clone())); + } + + // unwrap is safe, because current_block().is_none() is checked in branch above + let mut ancestor_a = self + .current_block() + .ok_or_else(|| SoarError::RollbackBeyondGenesis)? + .clone(); + let mut ancestor_b = block.clone(); + let mut ancestor_a_ht = self.storage.get_block_height(&ancestor_a)?; + let mut ancestor_b_ht = self.storage.get_block_height(&ancestor_b)?; + + // we track the path of "ancestor b" so that we know what needs to be + // applied to get from the `fork_point` to `block` + let mut ancestors_b = vec![block.clone()]; + + while ancestor_a_ht != ancestor_b_ht { + if ancestor_a_ht > ancestor_b_ht { + (ancestor_a, ancestor_a_ht) = self.get_block_parent(&ancestor_a, ancestor_a_ht)?; + } else { + (ancestor_b, ancestor_b_ht) = self.get_block_parent(&ancestor_b, ancestor_b_ht)?; + ancestors_b.push(ancestor_b.clone()); + } + } + + while ancestor_a != ancestor_b { + (ancestor_a, ancestor_a_ht) = self.get_block_parent(&ancestor_a, ancestor_a_ht)?; + (ancestor_b, ancestor_b_ht) = self.get_block_parent(&ancestor_b, ancestor_b_ht)?; + } + + let fork_point = ancestor_a; + + // fix the ancestors_b list so that it contains all the blocks + // that need to be applied starting from `fork_point` to + // reach `block`. To do this, we check if the tail of the list is equal + // to the `fork_point`, and if so, remove it. This could result in a zero-length + // list if `block` == `fork_point`. + if ancestors_b.last() == Some(&fork_point) { + ancestors_b.pop(); + } + + // Now, we have the most recent common ancestor (ancestor_a == ancestor_b) + // We can now execute Step 2: undo from the current block to the common ancestor + let mut current_block = self + .current_block() + .ok_or_else(|| SoarError::RollbackBeyondGenesis)? + .clone(); + while ¤t_block != &fork_point { + self.storage.undo_block(¤t_block)?; + current_block = self + .current_block() + .ok_or_else(|| SoarError::RollbackBeyondGenesis)? + .clone(); + } + + // Step 3: apply all the blocks from `fork_point` through to `block`, and then + // apply the new block + for block_to_apply in ancestors_b.iter().rev() { + current_block = self + .current_block() + .ok_or_else(|| SoarError::RollbackBeyondGenesis)? + .clone(); + let parent_block = self.storage.get_block_parent(block_to_apply)?; + assert_eq!( + current_block, parent_block, + "Failed while replaying operations: expected parent and current block to align" + ); + self.storage.reapply_block(block_to_apply)?; + } + + current_block = self + .current_block() + .ok_or_else(|| SoarError::RollbackBeyondGenesis)? + .clone(); + assert_eq!( + ¤t_block, block, + "Failed while replaying operations: expected current block to align to block" + ); + + Ok(()) + } + + /// Fetch the parent of `block` and its block height, checking that it matches `block_ht - 1` + fn get_block_parent( + &self, + block: &StacksBlockId, + block_ht: u64, + ) -> Result<(StacksBlockId, u64), SoarError> { + if block_ht == 0 { + return Err(SoarError::NoParentBlock("No parent at zero-block")); + } + let parent = self.storage.get_block_parent(&block)?; + let parent_ht = self.storage.get_block_height(&parent)?; + assert_eq!(block_ht - 1, parent_ht); + Ok((parent, parent_ht)) + } + + pub fn add_genesis( + &mut self, + block: StacksBlockId, + put_list: Vec, + ) -> Result<(), SoarError> { + if !self.storage.is_empty()? { + return Err(SoarError::GenesisRewriteAttempted); + } + + self.storage + .store_genesis_block(block.clone(), put_list.clone())?; + for put in put_list.into_iter() { + self.storage.apply_put(put); + } + + self.storage.set_current_block(block); + + Ok(()) + } + + pub fn add_block_ops( + &mut self, + block: StacksBlockId, + parent: StacksBlockId, + put_list: Vec, + ) -> Result<(), SoarError> { + // if needed, target the DB at the block's parent + self.set_block(&parent)?; + + // then store and apply the block + self.storage + .store_block_data(block.clone(), parent, put_list.clone())?; + for put in put_list.into_iter() { + self.storage.apply_put(put); + } + self.storage.set_current_block(block); + Ok(()) + } +} diff --git a/soar-db/src/memory.rs b/soar-db/src/memory.rs new file mode 100644 index 000000000..90b702fff --- /dev/null +++ b/soar-db/src/memory.rs @@ -0,0 +1,181 @@ +//! In-memory backing storage option for SoarDB. This backing store +//! is transient, so any data stored in this will be lost when the +//! process exits. + +use std::collections::HashMap; + +use crate::SoarError; + +use super::PutCommand; +use stacks_common::types::chainstate::StacksBlockId; + +pub struct BlockData { + put_log: Vec, + parent: Option, + height: u64, + #[allow(dead_code)] + id: StacksBlockId, +} + +pub struct MemoryBackingStore { + current_block: Option, + entries: HashMap, + blocks: HashMap, +} + +impl MemoryBackingStore { + pub fn new() -> Self { + MemoryBackingStore { + current_block: None, + entries: HashMap::new(), + blocks: HashMap::new(), + } + } + + pub fn has_block(&self, block: &StacksBlockId) -> bool { + self.blocks.contains_key(block) + } + + pub fn reapply_block(&mut self, block: &StacksBlockId) -> Result<(), SoarError> { + let block_data = self + .blocks + .get(block) + .ok_or_else(|| SoarError::BlockNotFound(block.clone()))?; + + for command in block_data.put_log.clone().into_iter() { + self.apply_put(command); + } + + self.set_current_block(block.clone()); + + Ok(()) + } + + pub fn undo_block(&mut self, expected_cur_block: &StacksBlockId) -> Result<(), SoarError> { + if self.current_block.is_none() || self.current_block.as_ref() != Some(expected_cur_block) { + return Err(SoarError::MismatchViewDuringRollback); + } + + let block_data = self + .blocks + .get(expected_cur_block) + .expect("Could not find block data for current block"); + let parent = block_data.parent.clone(); + + // undo each operation in reverse order from the edit log + for put_command in block_data.put_log.clone().into_iter().rev() { + self.undo_put(put_command); + } + + // operations are undone, now set the current_block to the parent + self.current_block = parent; + + Ok(()) + } + + pub fn get_value(&self, key: &str) -> Result, SoarError> { + Ok(self.entries.get(key).cloned()) + } + + pub fn get_block_parent(&self, block: &StacksBlockId) -> Result { + match self.blocks.get(&block) { + Some(data) => match data.parent.as_ref() { + Some(parent) => Ok(parent.clone()), + None => Err(SoarError::NoParentBlock("No parent at zero-block")), + }, + None => Err(SoarError::BlockNotFound(block.clone())), + } + } + + pub fn get_block_height(&self, block: &StacksBlockId) -> Result { + match self.blocks.get(&block) { + Some(data) => Ok(data.height), + None => Err(SoarError::BlockNotFound(block.clone())), + } + } + + pub fn set_current_block(&mut self, block: StacksBlockId) { + self.current_block = Some(block); + } + + pub fn current_block(&self) -> Option<&StacksBlockId> { + self.current_block.as_ref() + } + + pub fn is_empty(&self) -> Result { + Ok(self.current_block.is_none() && self.blocks.is_empty() && self.entries.is_empty()) + } + + pub fn store_genesis_block( + &mut self, + block: StacksBlockId, + put_log: Vec, + ) -> Result<(), SoarError> { + if self.current_block.is_some() { + return Err(SoarError::GenesisRewriteAttempted); + } + + let prior = self.blocks.insert( + block.clone(), + BlockData { + id: block, + parent: None, + put_log, + height: 0, + }, + ); + assert!( + prior.is_none(), + "Stored block data over an existing block entry" + ); + + Ok(()) + } + pub fn store_block_data( + &mut self, + block: StacksBlockId, + parent: StacksBlockId, + put_log: Vec, + ) -> Result<(), SoarError> { + let parent_height = match self.blocks.get(&parent) { + Some(parent_data) => Ok(parent_data.height), + None => Err(SoarError::NoParentBlock( + "Parent block has not been processed yet", + )), + }?; + + let prior = self.blocks.insert( + block.clone(), + BlockData { + id: block, + parent: Some(parent), + put_log, + height: parent_height + .checked_add(1) + .ok_or_else(|| SoarError::BlockHeightOverflow)?, + }, + ); + assert!( + prior.is_none(), + "Stored block data over an existing block entry" + ); + Ok(()) + } + + pub fn apply_put(&mut self, command: PutCommand) { + self.entries.insert(command.key, command.value); + } + + pub fn undo_put(&mut self, command: PutCommand) { + let old_value = if let Some(old_value) = command.prior_value { + self.entries.insert(command.key, old_value) + } else { + self.entries.remove(&command.key) + }; + assert_eq!( + old_value, + Some(command.value), + "Undo operation applied to an entry that had an unexpected value" + ); + } +} diff --git a/soar-db/src/tests.rs b/soar-db/src/tests.rs new file mode 100644 index 000000000..7a536a420 --- /dev/null +++ b/soar-db/src/tests.rs @@ -0,0 +1,152 @@ +//! Unit tests for the SoarDB implementation + +use clarity::types::chainstate::StacksBlockId; + +use crate::{PutCommand, SoarDB}; + +/// use the current value in db to create a prior_value +/// for a put command +fn make_put(db: &SoarDB, k: &str, v: &str) -> PutCommand { + let prior_value = db.get_value(k).unwrap(); + PutCommand { + key: k.to_string(), + prior_value, + value: v.to_string(), + } +} + +#[test] +fn simple_storage_chain() { + let mut db = SoarDB::new_memory(); + db.add_genesis(StacksBlockId([1; 32]), vec![make_put(&db, "A", "1")]) + .unwrap(); + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + + db.add_block_ops( + StacksBlockId([2; 32]), + StacksBlockId([1; 32]), + vec![ + make_put(&db, "B", "2"), + make_put(&db, "C", "2"), + make_put(&db, "D", "2"), + ], + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("2".into()))); + assert_eq!(db.get_value("C"), Ok(Some("2".into()))); + assert_eq!(db.get_value("D"), Ok(Some("2".into()))); + + db.add_block_ops( + StacksBlockId([3; 32]), + StacksBlockId([2; 32]), + vec![ + make_put(&db, "B", "3"), + make_put(&db, "C", "3"), + make_put(&db, "D", "3"), + ], + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("3".into()))); + assert_eq!(db.get_value("C"), Ok(Some("3".into()))); + assert_eq!(db.get_value("D"), Ok(Some("3".into()))); +} + +/// Test forking from a longer chain (1 -> 2 -> 3 -> 4) +/// to a shorter chain (1 -> 2 -> 3) and then back again +#[test] +fn fork_to_shorter_chain() { + let mut db = SoarDB::new_memory(); + db.add_genesis(StacksBlockId([1; 32]), vec![make_put(&db, "A", "1")]) + .unwrap(); + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + + db.add_block_ops( + StacksBlockId([2; 32]), + StacksBlockId([1; 32]), + vec![ + make_put(&db, "B", "2"), + make_put(&db, "C", "2"), + make_put(&db, "D", "2"), + ], + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("2".into()))); + assert_eq!(db.get_value("C"), Ok(Some("2".into()))); + assert_eq!(db.get_value("D"), Ok(Some("2".into()))); + + // these puts will be applied in a different fork + let fork_ops = vec![ + make_put(&db, "B", "f3"), + make_put(&db, "E", "f3"), + make_put(&db, "A", "f3"), + ]; + + db.add_block_ops( + StacksBlockId([3; 32]), + StacksBlockId([2; 32]), + vec![ + make_put(&db, "B", "3"), + make_put(&db, "C", "3"), + make_put(&db, "Z", "3"), + ], + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("3".into()))); + assert_eq!(db.get_value("C"), Ok(Some("3".into()))); + assert_eq!(db.get_value("D"), Ok(Some("2".into()))); + assert_eq!(db.get_value("Z"), Ok(Some("3".into()))); + assert_eq!(db.get_value("E"), Ok(None)); + + db.add_block_ops( + StacksBlockId([4; 32]), + StacksBlockId([3; 32]), + vec![ + make_put(&db, "B", "4"), + make_put(&db, "C", "4"), + make_put(&db, "D", "4"), + ], + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("4".into()))); + assert_eq!(db.get_value("C"), Ok(Some("4".into()))); + assert_eq!(db.get_value("D"), Ok(Some("4".into()))); + assert_eq!(db.get_value("Z"), Ok(Some("3".into()))); + assert_eq!(db.get_value("E"), Ok(None)); + + // these ops will be applied when we fork back + let fork_back_ops = vec![make_put(&db, "C", "5")]; + + db.add_block_ops(StacksBlockId([13; 32]), StacksBlockId([2; 32]), fork_ops) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("f3".into()))); + assert_eq!(db.get_value("B"), Ok(Some("f3".into()))); + assert_eq!(db.get_value("E"), Ok(Some("f3".into()))); + assert_eq!(db.get_value("C"), Ok(Some("2".into()))); + assert_eq!(db.get_value("D"), Ok(Some("2".into()))); + assert_eq!(db.get_value("Z"), Ok(None)); + + db.add_block_ops( + StacksBlockId([5; 32]), + StacksBlockId([4; 32]), + fork_back_ops, + ) + .unwrap(); + + assert_eq!(db.get_value("A"), Ok(Some("1".into()))); + assert_eq!(db.get_value("B"), Ok(Some("4".into()))); + assert_eq!(db.get_value("C"), Ok(Some("5".into()))); + assert_eq!(db.get_value("D"), Ok(Some("4".into()))); + assert_eq!(db.get_value("Z"), Ok(Some("3".into()))); + assert_eq!(db.get_value("E"), Ok(None)); +}