From 287e26032e3f1bd3aa1dec13b8c410b5ca12c478 Mon Sep 17 00:00:00 2001 From: ananas-block Date: Sun, 15 Jun 2025 16:34:14 +0100 Subject: [PATCH] perf: optimize bloom filter hashing --- Cargo.lock | 18 +++++++++++------- program-libs/bloom-filter/Cargo.toml | 2 +- program-libs/bloom-filter/src/lib.rs | 23 ++++++++++++++--------- program-libs/hasher/Cargo.toml | 1 + program-libs/hasher/src/keccak.rs | 26 +------------------------- 5 files changed, 28 insertions(+), 42 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c88c5a087c..bfcd778e82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2030,12 +2030,6 @@ dependencies = [ "wide", ] -[[package]] -name = "fastmurmur3" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d7e9bc68be4cdabbb8938140b01a8b5bc1191937f2c7e7ecc2fcebbe2d749df" - [[package]] name = "fastrand" version = "2.3.0" @@ -3270,11 +3264,11 @@ name = "light-bloom-filter" version = "0.2.0" dependencies = [ "bitvec", - "fastmurmur3", "light-hasher", "num-bigint 0.4.6", "pinocchio", "rand 0.8.5", + "solana-nostd-keccak", "solana-program-error", "thiserror 2.0.12", ] @@ -3428,6 +3422,7 @@ dependencies = [ "rand 0.8.5", "sha2 0.10.9", "sha3", + "solana-nostd-keccak", "solana-program-error", "solana-pubkey", "thiserror 2.0.12", @@ -6893,6 +6888,15 @@ dependencies = [ "solana-sdk-ids", ] +[[package]] +name = "solana-nostd-keccak" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8ced70920435b1baa58f76e6f84bbc1110ddd1d6161ec76b6d731ae8431e9c4" +dependencies = [ + "sha3", +] + [[package]] name = "solana-offchain-message" version = "2.2.1" diff --git a/program-libs/bloom-filter/Cargo.toml b/program-libs/bloom-filter/Cargo.toml index 5ccf6a9214..e7314b666c 100644 --- a/program-libs/bloom-filter/Cargo.toml +++ b/program-libs/bloom-filter/Cargo.toml @@ -12,7 +12,7 @@ pinocchio = ["dep:pinocchio"] [dependencies] bitvec = "1.0.1" -fastmurmur3 = "0.2.0" +solana-nostd-keccak = "0.1.3" num-bigint = { workspace = true } solana-program-error = { workspace = true, optional = true } pinocchio = { workspace = true, optional = true } diff --git a/program-libs/bloom-filter/src/lib.rs b/program-libs/bloom-filter/src/lib.rs index f3bd8384d0..d5375bd82e 100644 --- a/program-libs/bloom-filter/src/lib.rs +++ b/program-libs/bloom-filter/src/lib.rs @@ -68,15 +68,20 @@ impl<'a> BloomFilter<'a> { }) } - pub fn probe_index_fast_murmur(value_bytes: &[u8], iteration: usize, capacity: &u64) -> usize { - let iter_bytes = iteration.to_le_bytes(); - let base_hash = fastmurmur3::hash(value_bytes); - let mut combined_bytes = [0u8; 24]; - combined_bytes[..16].copy_from_slice(&base_hash.to_le_bytes()); - combined_bytes[16..].copy_from_slice(&iter_bytes); + pub fn probe_index_keccak(value_bytes: &[u8; 32], iteration: usize, capacity: &u64) -> usize { + let iter_bytes: [u8; 8] = iteration.to_le_bytes(); + let mut combined_bytes = [0u8; 40]; + combined_bytes[..32].copy_from_slice(value_bytes); + combined_bytes[32..].copy_from_slice(&iter_bytes); - let combined_hash = fastmurmur3::hash(&combined_bytes); - (combined_hash % (*capacity as u128)) as usize + let hash = solana_nostd_keccak::hash(&combined_bytes); + + let mut index = 0u64; + for chunk in hash.chunks(8) { + let value = u64::from_le_bytes(chunk.try_into().unwrap()); + index = value.wrapping_add(index) % *capacity; + } + index as usize } pub fn insert(&mut self, value: &[u8; 32]) -> Result<(), BloomFilterError> { @@ -98,7 +103,7 @@ impl<'a> BloomFilter<'a> { let bits = BitSlice::::from_slice_mut(self.store); for i in 0..self.num_iters { - let probe_index = Self::probe_index_fast_murmur(value, i, &(self.capacity)); + let probe_index = Self::probe_index_keccak(value, i, &(self.capacity)); if bits[probe_index] { continue; } else if insert { diff --git a/program-libs/hasher/Cargo.toml b/program-libs/hasher/Cargo.toml index 5f1c4cbb4b..4798f967bd 100644 --- a/program-libs/hasher/Cargo.toml +++ b/program-libs/hasher/Cargo.toml @@ -23,6 +23,7 @@ solana-program-error = { workspace = true, optional = true } solana-pubkey = { workspace = true, optional = true } pinocchio = { workspace = true, optional = true } borsh = { workspace = true } +solana-nostd-keccak = "0.1.3" [target.'cfg(not(target_os = "solana"))'.dependencies] ark-bn254 = { workspace = true } diff --git a/program-libs/hasher/src/keccak.rs b/program-libs/hasher/src/keccak.rs index 1f16278bfc..81d81d810c 100644 --- a/program-libs/hasher/src/keccak.rs +++ b/program-libs/hasher/src/keccak.rs @@ -14,31 +14,7 @@ impl Hasher for Keccak { } fn hashv(vals: &[&[u8]]) -> Result { - #[cfg(not(target_os = "solana"))] - { - use sha3::{Digest, Keccak256}; - - let mut hasher = Keccak256::default(); - for val in vals { - hasher.update(val); - } - Ok(hasher.finalize().into()) - } - // Call via a system call to perform the calculation - #[cfg(target_os = "solana")] - { - use crate::HASH_BYTES; - - let mut hash_result = [0; HASH_BYTES]; - unsafe { - crate::syscalls::sol_keccak256( - vals as *const _ as *const u8, - vals.len() as u64, - &mut hash_result as *mut _ as *mut u8, - ); - } - Ok(hash_result) - } + Ok(solana_nostd_keccak::hashv(vals)) } fn zero_bytes() -> ZeroBytes {