From d81a85ce690797cf89756ad44f6d31ded4eb9954 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Sat, 14 Dec 2024 01:41:30 +0800 Subject: [PATCH] feat: auto per-shard cache size limit across resharding (#12605) Address https://github.com/near/nearcore/blob/649743f6cb94474644b0cd5d0693b4fe5b277a0f/core/store/src/config.rs#L214-L215 by deriving shard cache limit from known access pattern (per contract account) such that the limits can stay relevant after resharding --- core/primitives/src/epoch_manager.rs | 4 +- core/store/src/config.rs | 68 +++++++++++++++++++--------- core/store/src/trie/trie_storage.rs | 21 ++++++--- 3 files changed, 64 insertions(+), 29 deletions(-) diff --git a/core/primitives/src/epoch_manager.rs b/core/primitives/src/epoch_manager.rs index 3fb693ed1fe..491f0ae05d6 100644 --- a/core/primitives/src/epoch_manager.rs +++ b/core/primitives/src/epoch_manager.rs @@ -523,7 +523,7 @@ macro_rules! include_config { /// List of (chain_id, version, JSON content) tuples used to initialize the EpochConfigStore. static CONFIGS: &[(&str, ProtocolVersion, &str)] = &[ - // Epoch configs for mainnet (genesis protool version is 29). + // Epoch configs for mainnet (genesis protocol version is 29). include_config!("mainnet", 29, "29.json"), include_config!("mainnet", 48, "48.json"), include_config!("mainnet", 56, "56.json"), @@ -536,7 +536,7 @@ static CONFIGS: &[(&str, ProtocolVersion, &str)] = &[ include_config!("mainnet", 100, "100.json"), include_config!("mainnet", 101, "101.json"), include_config!("mainnet", 143, "143.json"), - // Epoch configs for testnet (genesis protool version is 29). + // Epoch configs for testnet (genesis protocol version is 29). include_config!("testnet", 29, "29.json"), include_config!("testnet", 48, "48.json"), include_config!("testnet", 56, "56.json"), diff --git a/core/store/src/config.rs b/core/store/src/config.rs index b2ac03dbe27..ff0798bd1ae 100644 --- a/core/store/src/config.rs +++ b/core/store/src/config.rs @@ -2,9 +2,25 @@ use crate::trie::{ DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY, DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT, }; use crate::DBCol; -use near_primitives::shard_layout::ShardUId; +use near_primitives::chains::MAINNET; +use near_primitives::epoch_manager::EpochConfigStore; +use near_primitives::shard_layout::{account_id_to_shard_uid, ShardLayout, ShardUId}; +use near_primitives::types::AccountId; +use near_primitives::version::{ProtocolFeature, PROTOCOL_VERSION}; use near_time::Duration; -use std::{collections::HashMap, iter::FromIterator}; +use std::{collections::HashMap, str::FromStr}; + +// known cache access patterns per prominent contract account +// used to derive config `per_account_max_bytes` +const PER_ACCOUNT_CACHE_SIZE: &[(&'static str, bytesize::ByteSize)] = &[ + // aurora has its dedicated shard and it had very few cache misses even with + // cache size of only 50MB + ("aurora", bytesize::ByteSize::mb(50)), + // size was chosen by the estimation of the largest contract (token.sweat) storage size + // we are aware as of 23/08/2022 + // Note: on >= 1.34 nearcore version use 1gb if you have minimal hardware + ("token.sweat", bytesize::ByteSize::gb(3)), +]; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] #[serde(default)] @@ -174,6 +190,34 @@ impl StoreConfig { _ => bytesize::ByteSize::mib(32), } } + + fn default_per_shard_max_bytes() -> HashMap { + let epoch_config_store = EpochConfigStore::for_chain_id(MAINNET, None).unwrap(); + let mut shard_layouts: Vec = Vec::new(); + // Ideally we should use the protocol version from current epoch config as start of + // the range, but store should not need to depend on the knowledge of current epoch. + let start_version = + PROTOCOL_VERSION.min(ProtocolFeature::SimpleNightshadeV4.protocol_version() - 1); + for protocol_version in start_version..=PROTOCOL_VERSION { + let epoch_config = epoch_config_store.get_config(protocol_version); + let shard_layout = epoch_config.shard_layout.clone(); + // O(n) is fine as list is short + if !shard_layouts.contains(&shard_layout) { + shard_layouts.push(shard_layout); + } + } + + let mut per_shard_max_bytes: HashMap = HashMap::new(); + for (account_id, bytes) in PER_ACCOUNT_CACHE_SIZE.iter() { + let account_id = AccountId::from_str(account_id) + .expect("the hardcoded account id should guarantee to be valid"); + for shard_layout in shard_layouts.iter() { + let shard_uid = account_id_to_shard_uid(&account_id, &shard_layout); + per_shard_max_bytes.insert(shard_uid, *bytes); + } + } + per_shard_max_bytes + } } impl Default for StoreConfig { @@ -211,25 +255,7 @@ impl Default for StoreConfig { trie_cache: TrieCacheConfig { default_max_bytes: bytesize::ByteSize::mb(500), - // TODO(resharding) The cache size needs to adjusted for every resharding. - // Make that automatic e.g. by defining the minimum cache size per account rather than shard. - per_shard_max_bytes: HashMap::from_iter([ - // Temporary solution to make contracts with heavy trie access - // patterns on shard 3 more stable. It was chosen by the estimation - // of the largest contract storage size we are aware as of 23/08/2022. - // Note: on >= 1.34 nearcore version use 1gb if you have minimal hardware. - // In simple nightshade the heavy contract "token.sweat" is in shard 3 - (ShardUId { version: 1, shard_id: 3 }, bytesize::ByteSize::gb(3)), - // In simple nightshade v2 the heavy contract "token.sweat" is in shard 4 - (ShardUId { version: 2, shard_id: 4 }, bytesize::ByteSize::gb(3)), - // In simple nightshade v3 the heavy contract "token.sweat" is in shard 5 - (ShardUId { version: 3, shard_id: 5 }, bytesize::ByteSize::gb(3)), - // Shard 1 is dedicated to aurora and it had very few cache - // misses even with cache size of only 50MB - (ShardUId { version: 1, shard_id: 1 }, bytesize::ByteSize::mb(50)), - (ShardUId { version: 2, shard_id: 1 }, bytesize::ByteSize::mb(50)), - (ShardUId { version: 3, shard_id: 1 }, bytesize::ByteSize::mb(50)), - ]), + per_shard_max_bytes: Self::default_per_shard_max_bytes(), shard_cache_deletions_queue_capacity: DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY, }, diff --git a/core/store/src/trie/trie_storage.rs b/core/store/src/trie/trie_storage.rs index 0cd8c098d4b..d9240e68611 100644 --- a/core/store/src/trie/trie_storage.rs +++ b/core/store/src/trie/trie_storage.rs @@ -613,7 +613,7 @@ mod trie_cache_tests { use crate::trie::trie_storage::TrieCacheInner; use crate::{StoreConfig, TrieCache, TrieConfig}; use near_primitives::hash::hash; - use near_primitives::shard_layout::ShardUId; + use near_primitives::shard_layout::{ShardUId, ShardVersion}; use near_primitives::types::ShardId; fn put_value(cache: &mut TrieCacheInner, value: &[u8]) { @@ -702,20 +702,29 @@ mod trie_cache_tests { store_config.view_trie_cache.per_shard_max_bytes.insert(s0, S0_VIEW_SIZE); let trie_config = TrieConfig::from_store_config(&store_config); - check_cache_size(&trie_config, ShardId::new(1), false, DEFAULT_SIZE); - check_cache_size(&trie_config, ShardId::new(0), false, S0_SIZE); - check_cache_size(&trie_config, ShardId::new(1), true, DEFAULT_VIEW_SIZE); - check_cache_size(&trie_config, ShardId::new(0), true, S0_VIEW_SIZE); + check_cache_size(&trie_config, 0, ShardId::new(1), false, DEFAULT_SIZE); + check_cache_size(&trie_config, 0, ShardId::new(0), false, S0_SIZE); + check_cache_size(&trie_config, 0, ShardId::new(1), true, DEFAULT_VIEW_SIZE); + check_cache_size(&trie_config, 0, ShardId::new(0), true, S0_VIEW_SIZE); + } + + #[test] + fn test_default_per_shard_max_bytes() { + let store_config = StoreConfig::default(); + let trie_config = TrieConfig::from_store_config(&store_config); + check_cache_size(&trie_config, 3, ShardId::new(1), false, bytesize::ByteSize::mb(50)); + check_cache_size(&trie_config, 3, ShardId::new(5), false, bytesize::ByteSize::gb(3)); } #[track_caller] fn check_cache_size( trie_config: &TrieConfig, + version: ShardVersion, shard_id: ShardId, is_view: bool, expected_size: bytesize::ByteSize, ) { - let shard_uid = ShardUId::new(0, shard_id); + let shard_uid = ShardUId::new(version, shard_id); let trie_cache = TrieCache::new(&trie_config, shard_uid, is_view); assert_eq!(expected_size.as_u64(), trie_cache.lock().total_size_limit); assert_eq!(is_view, trie_cache.lock().is_view);