Skip to content

Commit

Permalink
feat: auto per-shard cache size limit across resharding (#12605)
Browse files Browse the repository at this point in the history
Address
https://github.com/near/nearcore/blob/649743f6cb94474644b0cd5d0693b4fe5b277a0f/core/store/src/config.rs#L214-L215

by deriving shard cache limit from known access pattern (per contract
account) such that the limits can stay relevant after resharding
  • Loading branch information
eagr authored Dec 13, 2024
1 parent 8a8d60c commit d81a85c
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 29 deletions.
4 changes: 2 additions & 2 deletions core/primitives/src/epoch_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ macro_rules! include_config {

/// List of (chain_id, version, JSON content) tuples used to initialize the EpochConfigStore.
static CONFIGS: &[(&str, ProtocolVersion, &str)] = &[
// Epoch configs for mainnet (genesis protool version is 29).
// Epoch configs for mainnet (genesis protocol version is 29).
include_config!("mainnet", 29, "29.json"),
include_config!("mainnet", 48, "48.json"),
include_config!("mainnet", 56, "56.json"),
Expand All @@ -536,7 +536,7 @@ static CONFIGS: &[(&str, ProtocolVersion, &str)] = &[
include_config!("mainnet", 100, "100.json"),
include_config!("mainnet", 101, "101.json"),
include_config!("mainnet", 143, "143.json"),
// Epoch configs for testnet (genesis protool version is 29).
// Epoch configs for testnet (genesis protocol version is 29).
include_config!("testnet", 29, "29.json"),
include_config!("testnet", 48, "48.json"),
include_config!("testnet", 56, "56.json"),
Expand Down
68 changes: 47 additions & 21 deletions core/store/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,25 @@ use crate::trie::{
DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY, DEFAULT_SHARD_CACHE_TOTAL_SIZE_LIMIT,
};
use crate::DBCol;
use near_primitives::shard_layout::ShardUId;
use near_primitives::chains::MAINNET;
use near_primitives::epoch_manager::EpochConfigStore;
use near_primitives::shard_layout::{account_id_to_shard_uid, ShardLayout, ShardUId};
use near_primitives::types::AccountId;
use near_primitives::version::{ProtocolFeature, PROTOCOL_VERSION};
use near_time::Duration;
use std::{collections::HashMap, iter::FromIterator};
use std::{collections::HashMap, str::FromStr};

// known cache access patterns per prominent contract account
// used to derive config `per_account_max_bytes`
const PER_ACCOUNT_CACHE_SIZE: &[(&'static str, bytesize::ByteSize)] = &[
// aurora has its dedicated shard and it had very few cache misses even with
// cache size of only 50MB
("aurora", bytesize::ByteSize::mb(50)),
// size was chosen by the estimation of the largest contract (token.sweat) storage size
// we are aware as of 23/08/2022
// Note: on >= 1.34 nearcore version use 1gb if you have minimal hardware
("token.sweat", bytesize::ByteSize::gb(3)),
];

#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(default)]
Expand Down Expand Up @@ -174,6 +190,34 @@ impl StoreConfig {
_ => bytesize::ByteSize::mib(32),
}
}

fn default_per_shard_max_bytes() -> HashMap<ShardUId, bytesize::ByteSize> {
let epoch_config_store = EpochConfigStore::for_chain_id(MAINNET, None).unwrap();
let mut shard_layouts: Vec<ShardLayout> = Vec::new();
// Ideally we should use the protocol version from current epoch config as start of
// the range, but store should not need to depend on the knowledge of current epoch.
let start_version =
PROTOCOL_VERSION.min(ProtocolFeature::SimpleNightshadeV4.protocol_version() - 1);
for protocol_version in start_version..=PROTOCOL_VERSION {
let epoch_config = epoch_config_store.get_config(protocol_version);
let shard_layout = epoch_config.shard_layout.clone();
// O(n) is fine as list is short
if !shard_layouts.contains(&shard_layout) {
shard_layouts.push(shard_layout);
}
}

let mut per_shard_max_bytes: HashMap<ShardUId, bytesize::ByteSize> = HashMap::new();
for (account_id, bytes) in PER_ACCOUNT_CACHE_SIZE.iter() {
let account_id = AccountId::from_str(account_id)
.expect("the hardcoded account id should guarantee to be valid");
for shard_layout in shard_layouts.iter() {
let shard_uid = account_id_to_shard_uid(&account_id, &shard_layout);
per_shard_max_bytes.insert(shard_uid, *bytes);
}
}
per_shard_max_bytes
}
}

impl Default for StoreConfig {
Expand Down Expand Up @@ -211,25 +255,7 @@ impl Default for StoreConfig {

trie_cache: TrieCacheConfig {
default_max_bytes: bytesize::ByteSize::mb(500),
// TODO(resharding) The cache size needs to adjusted for every resharding.
// Make that automatic e.g. by defining the minimum cache size per account rather than shard.
per_shard_max_bytes: HashMap::from_iter([
// Temporary solution to make contracts with heavy trie access
// patterns on shard 3 more stable. It was chosen by the estimation
// of the largest contract storage size we are aware as of 23/08/2022.
// Note: on >= 1.34 nearcore version use 1gb if you have minimal hardware.
// In simple nightshade the heavy contract "token.sweat" is in shard 3
(ShardUId { version: 1, shard_id: 3 }, bytesize::ByteSize::gb(3)),
// In simple nightshade v2 the heavy contract "token.sweat" is in shard 4
(ShardUId { version: 2, shard_id: 4 }, bytesize::ByteSize::gb(3)),
// In simple nightshade v3 the heavy contract "token.sweat" is in shard 5
(ShardUId { version: 3, shard_id: 5 }, bytesize::ByteSize::gb(3)),
// Shard 1 is dedicated to aurora and it had very few cache
// misses even with cache size of only 50MB
(ShardUId { version: 1, shard_id: 1 }, bytesize::ByteSize::mb(50)),
(ShardUId { version: 2, shard_id: 1 }, bytesize::ByteSize::mb(50)),
(ShardUId { version: 3, shard_id: 1 }, bytesize::ByteSize::mb(50)),
]),
per_shard_max_bytes: Self::default_per_shard_max_bytes(),
shard_cache_deletions_queue_capacity: DEFAULT_SHARD_CACHE_DELETIONS_QUEUE_CAPACITY,
},

Expand Down
21 changes: 15 additions & 6 deletions core/store/src/trie/trie_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,7 @@ mod trie_cache_tests {
use crate::trie::trie_storage::TrieCacheInner;
use crate::{StoreConfig, TrieCache, TrieConfig};
use near_primitives::hash::hash;
use near_primitives::shard_layout::ShardUId;
use near_primitives::shard_layout::{ShardUId, ShardVersion};
use near_primitives::types::ShardId;

fn put_value(cache: &mut TrieCacheInner, value: &[u8]) {
Expand Down Expand Up @@ -702,20 +702,29 @@ mod trie_cache_tests {
store_config.view_trie_cache.per_shard_max_bytes.insert(s0, S0_VIEW_SIZE);
let trie_config = TrieConfig::from_store_config(&store_config);

check_cache_size(&trie_config, ShardId::new(1), false, DEFAULT_SIZE);
check_cache_size(&trie_config, ShardId::new(0), false, S0_SIZE);
check_cache_size(&trie_config, ShardId::new(1), true, DEFAULT_VIEW_SIZE);
check_cache_size(&trie_config, ShardId::new(0), true, S0_VIEW_SIZE);
check_cache_size(&trie_config, 0, ShardId::new(1), false, DEFAULT_SIZE);
check_cache_size(&trie_config, 0, ShardId::new(0), false, S0_SIZE);
check_cache_size(&trie_config, 0, ShardId::new(1), true, DEFAULT_VIEW_SIZE);
check_cache_size(&trie_config, 0, ShardId::new(0), true, S0_VIEW_SIZE);
}

#[test]
fn test_default_per_shard_max_bytes() {
let store_config = StoreConfig::default();
let trie_config = TrieConfig::from_store_config(&store_config);
check_cache_size(&trie_config, 3, ShardId::new(1), false, bytesize::ByteSize::mb(50));
check_cache_size(&trie_config, 3, ShardId::new(5), false, bytesize::ByteSize::gb(3));
}

#[track_caller]
fn check_cache_size(
trie_config: &TrieConfig,
version: ShardVersion,
shard_id: ShardId,
is_view: bool,
expected_size: bytesize::ByteSize,
) {
let shard_uid = ShardUId::new(0, shard_id);
let shard_uid = ShardUId::new(version, shard_id);
let trie_cache = TrieCache::new(&trie_config, shard_uid, is_view);
assert_eq!(expected_size.as_u64(), trie_cache.lock().total_size_limit);
assert_eq!(is_view, trie_cache.lock().is_view);
Expand Down

0 comments on commit d81a85c

Please sign in to comment.