From bea5ae34750485140ce27544b4af2298d35c18c1 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Thu, 28 Nov 2024 20:31:23 +0100 Subject: [PATCH 1/2] chore: add merkle update benchmark --- Cargo.toml | 5 ++ benches/callgrind_usage_cell.rs | 8 ++- benches/merkle_update.rs | 93 +++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 benches/merkle_update.rs diff --git a/Cargo.toml b/Cargo.toml index 5bbe218f..1e0fd34a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,10 @@ harness = false name = "mine" harness = false +[[bench]] +name = "merkle_update" +harness = false + [[bench]] name = "dict_from_slice" harness = false @@ -94,6 +98,7 @@ serde_json = "1" iai-callgrind = "0.14" paste = "1.0.15" + [features] default = ["base64", "serde", "models", "sync"] sync = ["dep:scc"] diff --git a/benches/callgrind_usage_cell.rs b/benches/callgrind_usage_cell.rs index 80c08b38..245777d7 100644 --- a/benches/callgrind_usage_cell.rs +++ b/benches/callgrind_usage_cell.rs @@ -1,6 +1,8 @@ use everscale_types::cell::RefsIter; use everscale_types::prelude::*; -use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use iai_callgrind::{ + library_benchmark, library_benchmark_group, main, FlamegraphConfig, LibraryBenchmarkConfig, +}; use std::collections::HashSet; use std::hint::black_box; @@ -91,4 +93,6 @@ library_benchmark_group!( benchmarks = traverse_cell_ordinary, traverse_cell_storage_cell, traverse_cell_storage_cell_with_capacity ); -main!(library_benchmark_groups = traverse_cell); +main!(config =LibraryBenchmarkConfig::default() + .flamegraph(FlamegraphConfig::default()); + library_benchmark_groups = traverse_cell); diff --git a/benches/merkle_update.rs b/benches/merkle_update.rs new file mode 100644 index 00000000..9b428b14 --- /dev/null +++ b/benches/merkle_update.rs @@ -0,0 +1,93 @@ +use everscale_types::cell::{Cell, CellBuilder, HashBytes, UsageTree, UsageTreeMode}; +use everscale_types::dict::Dict; +use everscale_types::merkle::{MerkleUpdate, MerkleUpdateBuilder}; +use iai_callgrind::{ + library_benchmark, library_benchmark_group, main, FlamegraphConfig, LibraryBenchmarkConfig, +}; +use rand::prelude::{SliceRandom, StdRng}; +use rand::{Rng, SeedableRng}; + +fn size_for_different_dicts() -> (Dict, Vec) { + let value = (0..10000) + .map(|x| (x, Dict::::new())) + .collect::>(); + + let value = Dict::try_from_sorted_slice(&value) + .unwrap() + .into_root() + .unwrap(); + + let size = 1_000_000; + + let mut rng = StdRng::seed_from_u64(1337); + let mut keys: Vec = (0..size) + .map(|_| { + let mut key = [0u8; 32]; + rng.fill(&mut key[..]); + HashBytes::from(key) + }) + .collect(); + keys.sort_unstable(); + + let num_keys = 1000; + + let keys_to_check = keys + .choose_multiple(&mut rng, num_keys) + .copied() + .collect::>(); + + let keys_inner = keys.iter().map(|k| (*k, value.clone())).collect::>(); + let dict = Dict::try_from_sorted_slice(&keys_inner).unwrap(); + drop(keys); + + (dict, keys_to_check) +} + +fn build_update( + dict: &Dict, + keys_to_check: &[HashBytes], +) -> (Cell, Cell, UsageTree) { + let old_cell = CellBuilder::build_from(dict).unwrap(); + let usage_tree = UsageTree::new(UsageTreeMode::OnLoad); + let old_dict_cell_tracked = usage_tree.track(&old_cell); + + let mut dict = old_dict_cell_tracked + .parse::>() + .unwrap(); + + for (idx, key) in keys_to_check.iter().enumerate() { + let mut cell = CellBuilder::new(); + cell.store_u32(idx as _).unwrap(); + let cell = cell.build().unwrap(); + dict.set(key, cell.clone()).unwrap(); + } + + let new_dict_cell = CellBuilder::build_from(dict).unwrap(); + + (old_cell, new_dict_cell, usage_tree) +} + +fn prepare() -> (Cell, Cell, UsageTree) { + let (dict, keys_to_check) = size_for_different_dicts(); + build_update(&dict, &keys_to_check) +} + +#[library_benchmark] +#[bench::base(setup=prepare)] +fn merkle_update((prev, new, usage_tree): (Cell, Cell, UsageTree)) { + let mut merkle = MerkleUpdateBuilder::new(prev.as_ref(), new.as_ref(), usage_tree); + let update = merkle.build().unwrap(); + + std::mem::forget(update); + std::mem::forget(prev); + std::mem::forget(new); +} + +library_benchmark_group!( + + name = merkle; + benchmarks = merkle_update +); +main!(config =LibraryBenchmarkConfig::default() + .flamegraph(FlamegraphConfig::default()); + library_benchmark_groups = merkle); From d13f4fef35a69f2b75080c4d2e80863ae1768afe Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Thu, 28 Nov 2024 20:31:23 +0100 Subject: [PATCH 2/2] perf: refactor merkle update --- src/cell/usage_tree.rs | 4 + src/merkle/mod.rs | 24 ++++++ src/merkle/proof.rs | 4 + src/merkle/update.rs | 168 +++++++++++++++++++++++------------------ 4 files changed, 126 insertions(+), 74 deletions(-) diff --git a/src/cell/usage_tree.rs b/src/cell/usage_tree.rs index 2fd4d5f4..f3af5057 100644 --- a/src/cell/usage_tree.rs +++ b/src/cell/usage_tree.rs @@ -102,6 +102,10 @@ impl UsageTreeWithSubtrees { pub fn add_subtree(&mut self, root: &DynCell) -> bool { self.subtrees.insert(*root.repr_hash()) } + + pub(crate) fn len(&self) -> usize { + self.state.len() + } } #[cfg(not(feature = "sync"))] diff --git a/src/merkle/mod.rs b/src/merkle/mod.rs index 794a37a2..8617c2c9 100644 --- a/src/merkle/mod.rs +++ b/src/merkle/mod.rs @@ -30,6 +30,9 @@ mod __checks { pub trait MerkleFilter { /// Returns how the cell should be included in the Merkle proof or update. fn check(&self, cell: &HashBytes) -> FilterAction; + + /// Returns the number of elements in the filter, if known. + fn size_hint(&self) -> Option; } /// Merkle filter action. @@ -48,6 +51,11 @@ impl MerkleFilter for &T { fn check(&self, cell: &HashBytes) -> FilterAction { ::check(self, cell) } + + #[inline] + fn size_hint(&self) -> Option { + ::size_hint(self) + } } impl MerkleFilter for UsageTree { @@ -58,6 +66,10 @@ impl MerkleFilter for UsageTree { FilterAction::Skip } } + + fn size_hint(&self) -> Option { + Some(UsageTree::len(self)) + } } impl MerkleFilter for UsageTreeWithSubtrees { @@ -70,6 +82,10 @@ impl MerkleFilter for UsageTreeWithSubtrees { FilterAction::Skip } } + + fn size_hint(&self) -> Option { + Some(self.len()) + } } impl MerkleFilter for HashSet { @@ -80,6 +96,10 @@ impl MerkleFilter for HashSet { FilterAction::Skip } } + + fn size_hint(&self) -> Option { + Some(self.len()) + } } impl MerkleFilter for HashSet<&HashBytes, S> { @@ -90,4 +110,8 @@ impl MerkleFilter for HashSet<&HashBytes, S> { FilterAction::Skip } } + + fn size_hint(&self) -> Option { + Some(self.len()) + } } diff --git a/src/merkle/proof.rs b/src/merkle/proof.rs index 6f9a8e54..a7776a8a 100644 --- a/src/merkle/proof.rs +++ b/src/merkle/proof.rs @@ -190,6 +190,10 @@ impl MerkleProof { FilterAction::Skip } } + + fn size_hint(&self) -> Option { + Some(self.cells.len()) + } } let mut stack = vec![root.references()]; diff --git a/src/merkle/update.rs b/src/merkle/update.rs index 5dcb4e45..ca3b07c9 100644 --- a/src/merkle/update.rs +++ b/src/merkle/update.rs @@ -486,78 +486,6 @@ struct BuilderImpl<'a, 'b> { impl<'a: 'b, 'b> BuilderImpl<'a, 'b> { fn build(self) -> Result { - struct Resolver<'a, S> { - pruned_branches: HashMap<&'a HashBytes, bool, S>, - visited: HashSet<&'a HashBytes, S>, - filter: &'a dyn MerkleFilter, - changed_cells: HashSet<&'a HashBytes, S>, - } - - impl<'a, S> Resolver<'a, S> - where - S: BuildHasher, - { - fn fill(&mut self, cell: &'a DynCell, mut skip_filter: bool) -> bool { - let repr_hash = cell.repr_hash(); - - // Skip visited cells - if self.visited.contains(repr_hash) { - return false; - } - self.visited.insert(repr_hash); - - let is_pruned = match self.pruned_branches.get_mut(repr_hash) { - Some(true) => return false, - Some(visited) => { - *visited = true; - true - } - None => false, - }; - - let process_children = if skip_filter { - true - } else { - match self.filter.check(repr_hash) { - FilterAction::Skip => false, - FilterAction::Include => true, - FilterAction::IncludeSubtree => { - skip_filter = true; - true - } - } - }; - - let mut result = false; - if process_children { - for child in cell.references() { - result |= self.fill(child, skip_filter); - } - - if result { - self.changed_cells.insert(repr_hash); - } - } - - result | is_pruned - } - } - - struct InvertedFilter(F); - - impl MerkleFilter for InvertedFilter { - #[inline] - fn check(&self, cell: &HashBytes) -> FilterAction { - if self.0.check(cell) == FilterAction::Skip { - // TODO: check if FilterAction::IncludeSubtree is correct, - // because it is more optimal to just include the new subtree - FilterAction::Include - } else { - FilterAction::Skip - } - } - } - let old_hash = self.old.repr_hash(); let old_depth = self.old.repr_depth(); let new_hash = self.new.repr_hash(); @@ -590,9 +518,15 @@ impl<'a: 'b, 'b> BuilderImpl<'a, 'b> { // Prepare cell diff resolver let mut resolver = Resolver { pruned_branches, - visited: Default::default(), + visited: HashSet::with_capacity_and_hasher( + self.filter.size_hint().unwrap_or(512), + Default::default(), + ), filter: self.filter, - changed_cells: Default::default(), + changed_cells: HashSet::with_capacity_and_hasher( + self.filter.size_hint().unwrap_or(512), + Default::default(), + ), }; // Find all changed cells in the old cell tree @@ -619,6 +553,92 @@ impl<'a: 'b, 'b> BuilderImpl<'a, 'b> { } } +struct Resolver<'a, S> { + pruned_branches: HashMap<&'a HashBytes, bool, S>, + visited: HashSet<&'a HashBytes, S>, + filter: &'a dyn MerkleFilter, + changed_cells: HashSet<&'a HashBytes, S>, +} + +impl<'a, S> Resolver<'a, S> +where + S: BuildHasher, +{ + fn fill(&mut self, cell: &'a DynCell, mut skip_filter: bool) -> bool { + let repr_hash = cell.repr_hash(); + + if !self.visited.insert(repr_hash) { + return false; + } + + let is_pruned = match self.pruned_branches.get_mut(repr_hash) { + Some(visited) => { + if *visited { + return false; + } + *visited = true; + true + } + None => false, + }; + + let (process_children, new_skip_filter) = if skip_filter { + (true, true) + } else { + match self.filter.check(repr_hash) { + FilterAction::Skip => (false, false), + FilterAction::Include => (true, false), + FilterAction::IncludeSubtree => (true, true), + } + }; + skip_filter = new_skip_filter; + + // Process children only if needed + if !process_children { + return is_pruned; + } + + let mut result = false; + let refs = cell.references(); + + // Process all references + for child in refs { + result |= self.fill(child, skip_filter); + + // early exit if we found changes and don't need to process all children + if result && !skip_filter { + break; + } + } + + // update changed cells if needed + if result { + self.changed_cells.insert(repr_hash); + } + + result | is_pruned + } +} + +struct InvertedFilter(F); + +impl MerkleFilter for InvertedFilter { + #[inline] + fn check(&self, cell: &HashBytes) -> FilterAction { + if self.0.check(cell) == FilterAction::Skip { + // TODO: check if FilterAction::IncludeSubtree is correct, + // because it is more optimal to just include the new subtree + FilterAction::Include + } else { + FilterAction::Skip + } + } + + fn size_hint(&self) -> Option { + self.0.size_hint() + } +} + #[cfg(test)] mod tests { use super::*;