diff --git a/rs/canister_sandbox/replica_controller/src/lib.rs b/rs/canister_sandbox/replica_controller/src/lib.rs index 25713c9a235..2beac3a245a 100644 --- a/rs/canister_sandbox/replica_controller/src/lib.rs +++ b/rs/canister_sandbox/replica_controller/src/lib.rs @@ -3,4 +3,5 @@ pub mod controller_service_impl; pub mod launch_as_process; mod process_exe_and_args; pub mod process_os_metrics; +mod sandbox_process_eviction; pub mod sandboxed_execution_controller; diff --git a/rs/canister_sandbox/replica_controller/src/sandbox_process_eviction.rs b/rs/canister_sandbox/replica_controller/src/sandbox_process_eviction.rs new file mode 100644 index 00000000000..b06dc73a021 --- /dev/null +++ b/rs/canister_sandbox/replica_controller/src/sandbox_process_eviction.rs @@ -0,0 +1,144 @@ +use std::time::Instant; + +use ic_types::CanisterId; + +#[derive(Debug, Eq, PartialEq, Clone)] +pub(crate) struct EvictionCandidate { + pub id: CanisterId, + pub last_used: Instant, +} + +/// Evicts the least recently used candidates in order to bring the number of +/// the remaining candidates down to `max_count_threshold`. +/// +/// The function also tries to evict candidates that have been idle for a long +/// time (`last_used_threshold`) while keeping the number of the remaining +/// candidates at or above `min_count_threshold`. +/// +/// More formally: +/// 1. Sort the candidates in the order of increasing `last_used` field. +/// 2. Let `N` be the total number of candidates. +/// 3. Evict the first `K` candidates such that the number of remaining +/// candidates `N-K` is between the given thresholds: +/// - `min_count_threshold <= N-K <= max_count_threshold`. +/// - if there multiple possible values for `K`, then choose the one that +/// evicts the most candidates with `last_used < last_used_threshold`. +/// 4. Return the evicted candidates. +pub(crate) fn evict( + mut candidates: Vec, + min_count_threshold: usize, + max_count_threshold: usize, + last_used_threshold: Instant, +) -> Vec { + candidates.sort_by_key(|x| x.last_used); + + let evict_at_least = candidates.len().saturating_sub(max_count_threshold); + let evict_at_most = candidates.len().saturating_sub(min_count_threshold); + + let mut evicted = vec![]; + + for candidate in candidates.into_iter() { + if evicted.len() >= evict_at_most { + // Cannot evict anymore because at least `min_count_threshold` + // should remain not evicted. + break; + } + if candidate.last_used >= last_used_threshold && evicted.len() >= evict_at_least { + // We have already evicted the minimum required number of candidates + // and all the remaining candidates were not idle the recent + // `last_used_threshold` time window. No need to evict more. + break; + } + evicted.push(candidate) + } + + evicted +} + +#[cfg(test)] +mod tests { + use std::time::{Duration, Instant}; + + use ic_test_utilities::types::ids::canister_test_id; + + use super::{evict, EvictionCandidate}; + + #[test] + fn evict_empty() { + assert_eq!(evict(vec![], 0, 0, Instant::now()), vec![],); + } + + #[test] + fn evict_nothing() { + let mut candidates = vec![]; + let now = Instant::now(); + for i in 0..10 { + candidates.push(EvictionCandidate { + id: canister_test_id(i), + last_used: now, + }); + } + assert_eq!(evict(candidates, 0, 10, now,), vec![],); + } + + #[test] + fn evict_due_to_process_count() { + let mut candidates = vec![]; + let now = Instant::now(); + for i in 0..100 { + candidates.push(EvictionCandidate { + id: canister_test_id(i), + last_used: now + Duration::from_secs(100 - i), + }); + } + assert_eq!( + evict(candidates.clone(), 0, 90, now,), + candidates.into_iter().rev().take(10).collect::>() + ); + } + + #[test] + fn evict_due_to_idle_time() { + let mut candidates = vec![]; + let now = Instant::now(); + for i in 0..100 { + candidates.push(EvictionCandidate { + id: canister_test_id(i), + last_used: now - Duration::from_secs(i), + }); + } + assert_eq!( + evict(candidates.clone(), 0, 100, now - Duration::from_secs(50)), + candidates.into_iter().rev().take(49).collect::>() + ); + } + + #[test] + fn evict_some_due_to_idle_time() { + let mut candidates = vec![]; + let now = Instant::now(); + for i in 0..100 { + candidates.push(EvictionCandidate { + id: canister_test_id(i), + last_used: now - Duration::from_secs(i + 1), + }); + } + assert_eq!( + evict(candidates.clone(), 10, 100, now), + candidates.into_iter().rev().take(90).collect::>() + ); + } + + #[test] + fn evict_all() { + let mut candidates = vec![]; + let now = Instant::now(); + for i in 0..100 { + candidates.push(EvictionCandidate { + id: canister_test_id(i), + last_used: now - Duration::from_secs(i + 1), + }); + } + assert_eq!(evict(candidates.clone(), 0, 100, now).len(), 100); + } +} diff --git a/rs/canister_sandbox/replica_controller/src/sandboxed_execution_controller.rs b/rs/canister_sandbox/replica_controller/src/sandboxed_execution_controller.rs index 98f7d1f8ac4..93cf42cd66d 100644 --- a/rs/canister_sandbox/replica_controller/src/sandboxed_execution_controller.rs +++ b/rs/canister_sandbox/replica_controller/src/sandboxed_execution_controller.rs @@ -37,7 +37,7 @@ use std::process::ExitStatus; use std::sync::Weak; use std::sync::{Arc, Mutex}; use std::thread; -use std::time::Duration; +use std::time::{Duration, Instant}; use crate::active_execution_state_registry::{ActiveExecutionStateRegistry, CompletionResult}; use crate::controller_service_impl::ControllerServiceImpl; @@ -45,10 +45,14 @@ use crate::launch_as_process::{create_sandbox_process, spawn_launcher_process}; use crate::process_exe_and_args::{create_launcher_argv, create_sandbox_argv}; #[cfg(target_os = "linux")] use crate::process_os_metrics; +use crate::sandbox_process_eviction::{self, EvictionCandidate}; -const SANDBOX_PROCESS_INACTIVE_TIME_BEFORE_EVICTION: Duration = Duration::from_secs(60); const SANDBOX_PROCESS_UPDATE_INTERVAL: Duration = Duration::from_secs(10); +// The percentage of sandbox processes to evict in one go in order to amortize +// for the eviction cost. +const SANDBOX_PROCESS_EVICTION_PERCENT: usize = 20; + const SANDBOXED_EXECUTION_INVALID_MEMORY_SIZE: &str = "sandboxed_execution_invalid_memory_size"; // Metric labels for the different outcomes of a wasm cache lookup. Stored in @@ -392,12 +396,12 @@ impl std::fmt::Debug for OpenedWasm { /// Manages the lifetime of a remote sandbox memory and provides its id. pub struct OpenedMemory { - sandbox_process: Arc, + sandbox_process: Weak, memory_id: MemoryId, } impl OpenedMemory { - fn new(sandbox_process: Arc, memory_id: MemoryId) -> Self { + fn new(sandbox_process: Weak, memory_id: MemoryId) -> Self { Self { sandbox_process, memory_id, @@ -406,22 +410,28 @@ impl OpenedMemory { } impl SandboxMemoryOwner for OpenedMemory { - fn get_id(&self) -> usize { + fn get_sandbox_memory_id(&self) -> usize { self.memory_id.as_usize() } + + fn get_sandbox_process_id(&self) -> Option { + self.sandbox_process.upgrade().map(|sp| sp.pid as usize) + } } impl Drop for OpenedMemory { fn drop(&mut self) { - self.sandbox_process - .history - .record(format!("CloseMemory(memory_id={})", self.memory_id)); - self.sandbox_process - .sandbox_service - .close_memory(protocol::sbxsvc::CloseMemoryRequest { - memory_id: self.memory_id, - }) - .on_completion(|_| {}); + if let Some(sandbox_process) = self.sandbox_process.upgrade() { + sandbox_process + .history + .record(format!("CloseMemory(memory_id={})", self.memory_id)); + sandbox_process + .sandbox_service + .close_memory(protocol::sbxsvc::CloseMemoryRequest { + memory_id: self.memory_id, + }) + .on_completion(|_| {}); + } } } @@ -435,26 +445,32 @@ impl std::fmt::Debug for OpenedMemory { enum Backend { Active { + // The strong reference to `SandboxProcess` ensures that the sandbox + // process will stay alive even if it is not used. sandbox_process: Arc, - last_used: std::time::Instant, + stats: SandboxProcessStats, }, Evicted { + // The weak reference is used to promote the sandbox process back to + // `active` if a new message execution starts. sandbox_process: Weak, - last_used: std::time::Instant, + stats: SandboxProcessStats, }, + // A dummy, not observable state that is used as a placeholder in + // `std::mem::replace()`. Empty, } +#[derive(Clone)] +struct SandboxProcessStats { + last_used: std::time::Instant, +} + enum SandboxProcessStatus { Active, Evicted, } -struct SandboxProcessStats { - time_since_last_usage: std::time::Duration, - status: SandboxProcessStatus, -} - // Represent a paused sandbox execution. struct PausedSandboxExecution { canister_id: CanisterId, @@ -534,7 +550,33 @@ impl PausedWasmExecution for PausedSandboxExecution { /// Manages sandboxed processes, forwards requests to the appropriate /// process. pub struct SandboxedExecutionController { + /// A registry of known sandbox processes. Each sandbox process can be in + /// one of two states: + /// + /// - `active`: the entry in the registry keeps a strong reference to the + /// sandbox process, so that it is guaranteed to stay alive. + /// + /// - `evicted`: the entry in the registry keeps a weak reference to the + /// sandbox process, so that the sandbox process is terminated as soon as + /// the last strong reference to it is dropped. In other words, the sandbox + /// process is terminated as soon as all pending executions finish and no + /// new execution starts. + /// + /// The sandbox process can move from `evicted` back to `active` if a new + /// message execution starts. + /// + /// Invariants: + /// + /// - If a sandbox process has a strong reference from somewhere else in the + /// replica process, then the registry has an entry for that sandbox process. + /// The entry may be either the `active` or `evicted` state. + /// + /// - An entry is removed from the registry only if it is in the `evicted` + /// state and the strong reference count reaches zero. backends: Arc>>, + min_sandbox_count: usize, + max_sandbox_count: usize, + max_sandbox_idle_time: Duration, logger: ReplicaLogger, /// Executable and arguments to be passed to `canister_sandbox` which are /// the same for all canisters. @@ -612,12 +654,12 @@ impl WasmExecutor for SandboxedExecutionController { // Now set up resources on the sandbox to drive the execution. let wasm_memory_handle = open_remote_memory(&sandbox_process, &execution_state.wasm_memory); let canister_id = sandbox_safe_system_state.canister_id(); - let wasm_memory_id = MemoryId::from(wasm_memory_handle.get_id()); + let wasm_memory_id = MemoryId::from(wasm_memory_handle.get_sandbox_memory_id()); let next_wasm_memory_id = MemoryId::new(); let stable_memory_handle = open_remote_memory(&sandbox_process, &execution_state.stable_memory); - let stable_memory_id = MemoryId::from(stable_memory_handle.get_id()); + let stable_memory_id = MemoryId::from(stable_memory_handle.get_sandbox_memory_id()); let next_stable_memory_id = MemoryId::new(); sandbox_process.history.record( @@ -881,6 +923,9 @@ impl SandboxedExecutionController { embedder_config: &EmbeddersConfig, ) -> std::io::Result { let launcher_exec_argv = create_launcher_argv().expect("No sandbox_launcher binary found"); + let min_sandbox_count = embedder_config.min_sandbox_count; + let max_sandbox_count = embedder_config.max_sandbox_count; + let max_sandbox_idle_time = embedder_config.max_sandbox_idle_time; let sandbox_exec_argv = create_sandbox_argv(embedder_config).expect("No canister_sandbox binary found"); let backends = Arc::new(Mutex::new(HashMap::new())); @@ -895,6 +940,9 @@ impl SandboxedExecutionController { logger_copy, backends_copy, metrics_copy, + min_sandbox_count, + max_sandbox_count, + max_sandbox_idle_time, ); }); @@ -920,6 +968,9 @@ impl SandboxedExecutionController { Ok(Self { backends, + min_sandbox_count, + max_sandbox_count, + max_sandbox_idle_time, logger, sandbox_exec_argv, metrics, @@ -935,18 +986,22 @@ impl SandboxedExecutionController { #[allow(unused_variables)] logger: ReplicaLogger, backends: Arc>>, metrics: Arc, + min_sandbox_count: usize, + max_sandbox_count: usize, + max_sandbox_idle_time: Duration, ) { loop { - let sandbox_processes = scavenge_sandbox_processes(&backends); + let sandbox_processes = get_sandbox_process_stats(&backends); #[cfg(target_os = "linux")] { let mut total_anon_rss: u64 = 0; let mut total_memfd_rss: u64 = 0; + let now = std::time::Instant::now(); // For all processes requested, get their memory usage and report // it keyed by pid. Ignore processes failures to get - for (sandbox_process, stats) in &sandbox_processes { + for (sandbox_process, stats, status) in &sandbox_processes { let pid = sandbox_process.pid; let mut process_rss = 0; if let Ok(kib) = process_os_metrics::get_anon_rss(pid) { @@ -970,16 +1025,19 @@ impl SandboxedExecutionController { metrics .sandboxed_execution_subprocess_rss .observe(process_rss as f64); - match stats.status { + let time_since_last_usage = now + .checked_duration_since(stats.last_used) + .unwrap_or_else(|| std::time::Duration::from_secs(0)); + match status { SandboxProcessStatus::Active => { metrics .sandboxed_execution_subprocess_active_last_used - .observe(stats.time_since_last_usage.as_secs_f64()); + .observe(time_since_last_usage.as_secs_f64()); } SandboxProcessStatus::Evicted => { metrics .sandboxed_execution_subprocess_evicted_last_used - .observe(stats.time_since_last_usage.as_secs_f64()); + .observe(time_since_last_usage.as_secs_f64()); } } } @@ -998,29 +1056,43 @@ impl SandboxedExecutionController { // on macos anyway. #[cfg(not(target_os = "linux"))] { + let now = std::time::Instant::now(); // For all processes requested, get their memory usage and report // it keyed by pid. Ignore processes failures to get - for (_sandbox_process, stats) in &sandbox_processes { - match stats.status { + for (_sandbox_process, stats, status) in &sandbox_processes { + let time_since_last_usage = now + .checked_duration_since(stats.last_used) + .unwrap_or_else(|| std::time::Duration::from_secs(0)); + match status { SandboxProcessStatus::Active => { metrics .sandboxed_execution_subprocess_active_last_used - .observe(stats.time_since_last_usage.as_secs_f64()); + .observe(time_since_last_usage.as_secs_f64()); } SandboxProcessStatus::Evicted => { metrics .sandboxed_execution_subprocess_evicted_last_used - .observe(stats.time_since_last_usage.as_secs_f64()); + .observe(time_since_last_usage.as_secs_f64()); } } } } - // Scavenge and collect metrics sufficiently infrequently that it - // does not use excessive compute resources. It might be sensible to - // scale this based on the time measured to perform the collection - // and e.g. ensure that we are 99% idle instead of using a static - // duration here. + { + let mut guard = backends.lock().unwrap(); + evict_sandbox_processes( + &mut guard, + min_sandbox_count, + max_sandbox_count, + max_sandbox_idle_time, + ); + } + + // Collect metrics sufficiently infrequently that it does not use + // excessive compute resources. It might be sensible to scale this + // based on the time measured to perform the collection and e.g. + // ensure that we are 99% idle instead of using a static duration + // here. std::thread::sleep(SANDBOX_PROCESS_UPDATE_INTERVAL); } } @@ -1030,26 +1102,28 @@ impl SandboxedExecutionController { if let Some(backend) = (*guard).get_mut(&canister_id) { let old = std::mem::replace(backend, Backend::Empty); - let sandbox_process = match old { + let sandbox_process_and_stats = match old { Backend::Active { - sandbox_process, .. - } => Some(sandbox_process), + sandbox_process, + stats, + } => Some((sandbox_process, stats)), Backend::Evicted { - sandbox_process, .. - } => sandbox_process.upgrade(), + sandbox_process, + stats, + } => sandbox_process.upgrade().map(|p| (p, stats)), Backend::Empty => None, }; - if let Some(sandbox_process) = sandbox_process { + if let Some((sandbox_process, _stats)) = sandbox_process_and_stats { let now = std::time::Instant::now(); - if SANDBOX_PROCESS_INACTIVE_TIME_BEFORE_EVICTION.as_secs() > 0 { + if self.max_sandbox_count > 0 { *backend = Backend::Active { sandbox_process: Arc::clone(&sandbox_process), - last_used: now, + stats: SandboxProcessStats { last_used: now }, }; } else { *backend = Backend::Evicted { sandbox_process: Arc::downgrade(&sandbox_process), - last_used: now, + stats: SandboxProcessStats { last_used: now }, }; } return sandbox_process; @@ -1057,6 +1131,17 @@ impl SandboxedExecutionController { } let _timer = self.metrics.sandboxed_execution_spawn_process.start_timer(); + if guard.len() > self.max_sandbox_count { + let to_evict = self.max_sandbox_count * SANDBOX_PROCESS_EVICTION_PERCENT / 100; + let max_active_sandboxes = self.max_sandbox_count.saturating_sub(to_evict); + evict_sandbox_processes( + &mut guard, + self.min_sandbox_count, + max_active_sandboxes, + self.max_sandbox_idle_time, + ); + } + // No sandbox process found for this canister. Start a new one and register it. let reg = Arc::new(ActiveExecutionStateRegistry::new()); let controller_service = ControllerServiceImpl::new(Arc::clone(®), self.logger.clone()); @@ -1079,7 +1164,7 @@ impl SandboxedExecutionController { let now = std::time::Instant::now(); let backend = Backend::Active { sandbox_process: Arc::clone(&sandbox_process), - last_used: now, + stats: SandboxProcessStats { last_used: now }, }; (*guard).insert(canister_id, backend); @@ -1339,113 +1424,146 @@ fn open_remote_memory( memory: &Memory, ) -> SandboxMemoryHandle { let mut guard = memory.sandbox_memory.lock().unwrap(); - match &*guard { - SandboxMemory::Synced(id) => id.clone(), - SandboxMemory::Unsynced => { - let serialized_page_map = memory.page_map.serialize(); - // Only clean memory without any dirty pages can be unsynced. - // That is because all dirty pages are created by the sandbox and - // they are automatically synced using `wrap_remote_memory`. - assert!(serialized_page_map.page_delta.is_empty()); - assert!(serialized_page_map.round_delta.is_empty()); - let serialized_memory = MemorySerialization { - page_map: serialized_page_map, - num_wasm_pages: memory.size, - }; - let memory_id = MemoryId::new(); - sandbox_process - .history - .record(format!("OpenMemory(memory_id={})", memory_id)); - sandbox_process - .sandbox_service - .open_memory(protocol::sbxsvc::OpenMemoryRequest { - memory_id, - memory: serialized_memory, - }) - .on_completion(|_| {}); - let handle = wrap_remote_memory(sandbox_process, memory_id); - *guard = SandboxMemory::Synced(handle.clone()); - handle + if let SandboxMemory::Synced(id) = &*guard { + if let Some(pid) = id.get_sandbox_process_id() { + // There is a at most one sandbox process per canister at any time. + assert_eq!(pid, sandbox_process.pid as usize); + return id.clone(); } } + + // Here we have two cases: + // 1) either the memory was never synchronized with any sandbox process, + // 2) or the memory was synchronized was some sandbox process that got evicted + // and terminated in the meantime. + // In both cases, we need to synchronize the memory with the given sandbox + // process. + + let serialized_page_map = memory.page_map.serialize(); + let serialized_memory = MemorySerialization { + page_map: serialized_page_map, + num_wasm_pages: memory.size, + }; + let memory_id = MemoryId::new(); + sandbox_process + .history + .record(format!("OpenMemory(memory_id={})", memory_id)); + sandbox_process + .sandbox_service + .open_memory(protocol::sbxsvc::OpenMemoryRequest { + memory_id, + memory: serialized_memory, + }) + .on_completion(|_| {}); + let handle = wrap_remote_memory(sandbox_process, memory_id); + *guard = SandboxMemory::Synced(handle.clone()); + handle } fn wrap_remote_memory( sandbox_process: &Arc, memory_id: MemoryId, ) -> SandboxMemoryHandle { - let opened_memory = OpenedMemory::new(Arc::clone(sandbox_process), memory_id); + let opened_memory = OpenedMemory::new(Arc::downgrade(sandbox_process), memory_id); SandboxMemoryHandle::new(Arc::new(opened_memory)) } -// Evicts inactive process and returns all processes that are still alive. -fn scavenge_sandbox_processes( +// Evicts some sandbox process backends according to the heuristics of the +// `sandbox_process_eviction::evict()` function. See the comments of that +// function for the explanation of the threshold parameters. +fn evict_sandbox_processes( + backends: &mut HashMap, + min_active_sandboxes: usize, + max_active_sandboxes: usize, + max_sandbox_idle_time: Duration, +) { + // Remove the already terminated processes. + backends.retain(|_id, backend| match backend { + Backend::Active { .. } => true, + Backend::Evicted { + sandbox_process, .. + } => { + // Once `strong_count` reaches zero, then `upgrade()` will always + // return `None`. This means that such entries never be used again, + // so it is safe to remove them from the hash map. + sandbox_process.strong_count() > 0 + } + Backend::Empty => false, + }); + + let candidates: Vec<_> = backends + .iter() + .filter_map(|(id, backend)| match backend { + Backend::Active { stats, .. } => Some(EvictionCandidate { + id: *id, + last_used: stats.last_used, + }), + Backend::Evicted { .. } | Backend::Empty => None, + }) + .collect(); + + let evicted = sandbox_process_eviction::evict( + candidates, + min_active_sandboxes, + max_active_sandboxes, + Instant::now() - max_sandbox_idle_time, + ); + + // Actually evict all the selected eviction candidates. + for EvictionCandidate { id, .. } in evicted.iter() { + if let Some(backend) = backends.get_mut(id) { + let old = std::mem::replace(backend, Backend::Empty); + let new = match old { + Backend::Active { + sandbox_process, + stats, + } => Backend::Evicted { + sandbox_process: Arc::downgrade(&sandbox_process), + stats, + }, + Backend::Evicted { .. } | Backend::Empty => old, + }; + *backend = new; + } + } +} + +// Returns all processes that are still alive. +fn get_sandbox_process_stats( backends: &Arc>>, -) -> Vec<(Arc, SandboxProcessStats)> { - let mut guard = backends.lock().unwrap(); - let now = std::time::Instant::now(); +) -> Vec<( + Arc, + SandboxProcessStats, + SandboxProcessStatus, +)> { + let guard = backends.lock().unwrap(); let mut result = vec![]; - for backend in guard.values_mut() { - let old = std::mem::replace(backend, Backend::Empty); - let new = match old { + for backend in guard.values() { + match backend { Backend::Active { sandbox_process, - last_used, + stats, } => { - let inactive_time = now - .checked_duration_since(last_used) - .unwrap_or_else(|| std::time::Duration::from_secs(0)); - if inactive_time > SANDBOX_PROCESS_INACTIVE_TIME_BEFORE_EVICTION { - result.push(( - Arc::clone(&sandbox_process), - SandboxProcessStats { - time_since_last_usage: inactive_time, - status: SandboxProcessStatus::Evicted, - }, - )); - Backend::Evicted { - sandbox_process: Arc::downgrade(&sandbox_process), - last_used, - } - } else { - result.push(( - Arc::clone(&sandbox_process), - SandboxProcessStats { - time_since_last_usage: inactive_time, - status: SandboxProcessStatus::Active, - }, - )); - Backend::Active { - sandbox_process, - last_used, - } - } + result.push(( + Arc::clone(sandbox_process), + stats.clone(), + SandboxProcessStatus::Active, + )); } Backend::Evicted { sandbox_process, - last_used, - } => match sandbox_process.upgrade() { - Some(strong_reference) => { - let inactive_time = now - .checked_duration_since(last_used) - .unwrap_or_else(|| std::time::Duration::from_secs(0)); + stats, + } => { + if let Some(strong_reference) = sandbox_process.upgrade() { result.push(( strong_reference, - SandboxProcessStats { - time_since_last_usage: inactive_time, - status: SandboxProcessStatus::Evicted, - }, + stats.clone(), + SandboxProcessStatus::Evicted, )); - Backend::Evicted { - sandbox_process, - last_used, - } } - None => Backend::Empty, - }, - Backend::Empty => Backend::Empty, + } + Backend::Empty => {} }; - *backend = new; } result } diff --git a/rs/config/src/embedders.rs b/rs/config/src/embedders.rs index 325aff488a4..3f93fcd9498 100644 --- a/rs/config/src/embedders.rs +++ b/rs/config/src/embedders.rs @@ -1,3 +1,5 @@ +use std::time::Duration; + use ic_base_types::NumBytes; use ic_sys::PAGE_SIZE; use ic_types::{NumInstructions, NumPages}; @@ -29,6 +31,18 @@ pub(crate) const DEFAULT_COST_TO_COMPILE_WASM_INSTRUCTION: NumInstructions = /// The number of rayon threads used by wasmtime to compile wasm binaries const DEFAULT_WASMTIME_RAYON_COMPILATION_THREADS: usize = 10; +/// Sandbox process eviction does not activate if the number of sandbox +/// processes is below this threshold. +pub(crate) const DEFAULT_MIN_SANDBOX_COUNT: usize = 500; + +/// Sandbox process eviction ensures that the number of sandbox processes is +/// always below this threshold. +pub(crate) const DEFAULT_MAX_SANDBOX_COUNT: usize = 2_000; + +/// A sandbox process may be evicted after it has been idle for this +/// duration and sandbox process eviction is activated. +pub(crate) const DEFAULT_MAX_SANDBOX_IDLE_TIME: Duration = Duration::from_secs(30 * 60); + #[allow(non_upper_case_globals)] const KiB: u64 = 1024; #[allow(non_upper_case_globals)] @@ -83,6 +97,18 @@ pub struct Config { // Maximum number of stable memory dirty pages that a single message execution // is allowed to produce. pub stable_memory_dirty_page_limit: NumPages, + + /// Sandbox process eviction does not activate if the number of sandbox + /// processes is below this threshold. + pub min_sandbox_count: usize, + + /// Sandbox process eviction ensures that the number of sandbox processes is + /// always below this threshold. + pub max_sandbox_count: usize, + + /// A sandbox process may be evicted after it has been idle for this + /// duration and sandbox process eviction is activated. + pub max_sandbox_idle_time: Duration, } impl Config { @@ -98,6 +124,9 @@ impl Config { num_rayon_compilation_threads: DEFAULT_WASMTIME_RAYON_COMPILATION_THREADS, feature_flags: FeatureFlags::default(), stable_memory_dirty_page_limit: NumPages::from(STABLE_MEMORY_DIRTY_PAGE_LIMIT), + min_sandbox_count: DEFAULT_MIN_SANDBOX_COUNT, + max_sandbox_count: DEFAULT_MAX_SANDBOX_COUNT, + max_sandbox_idle_time: DEFAULT_MAX_SANDBOX_IDLE_TIME, } } } diff --git a/rs/config/src/execution_environment.rs b/rs/config/src/execution_environment.rs index 5aa727e79e3..4aab9f71591 100644 --- a/rs/config/src/execution_environment.rs +++ b/rs/config/src/execution_environment.rs @@ -8,7 +8,7 @@ use ic_types::{ Cycles, NumBytes, NumInstructions, MAX_STABLE_MEMORY_IN_BYTES, MAX_WASM_MEMORY_IN_BYTES, }; use serde::{Deserialize, Serialize}; -use std::str::FromStr; +use std::{str::FromStr, time::Duration}; const GB: u64 = 1024 * 1024 * 1024; @@ -155,6 +155,18 @@ pub struct Config { /// Indicates whether composite queries are available or not. pub composite_queries: FlagStatus, + + /// Sandbox process eviction does not activate if the number of sandbox + /// processes is below this threshold. + pub min_sandbox_count: usize, + + /// Sandbox process eviction ensures that the number of sandbox processes is + /// always below this threshold. + pub max_sandbox_count: usize, + + /// A sandbox process may be evicted after it has been idle for this + /// duration and sandbox process eviction is activated. + pub max_sandbox_idle_time: Duration, } impl Default for Config { @@ -211,6 +223,9 @@ impl Default for Config { mainnet_canister_id: Some(bitcoin_mainnet_canister_id), }, composite_queries: FlagStatus::Disabled, + min_sandbox_count: embedders::DEFAULT_MIN_SANDBOX_COUNT, + max_sandbox_count: embedders::DEFAULT_MAX_SANDBOX_COUNT, + max_sandbox_idle_time: embedders::DEFAULT_MAX_SANDBOX_IDLE_TIME, } } } diff --git a/rs/execution_environment/src/hypervisor.rs b/rs/execution_environment/src/hypervisor.rs index 1d8bc5f2efc..53cf3eb5871 100644 --- a/rs/execution_environment/src/hypervisor.rs +++ b/rs/execution_environment/src/hypervisor.rs @@ -230,6 +230,8 @@ impl Hypervisor { embedder_config.feature_flags.rate_limiting_of_debug_prints = config.rate_limiting_of_debug_prints; embedder_config.cost_to_compile_wasm_instruction = config.cost_to_compile_wasm_instruction; + embedder_config.max_sandbox_count = config.max_sandbox_count; + embedder_config.max_sandbox_idle_time = config.max_sandbox_idle_time; let wasm_executor: Arc = match config.canister_sandboxing_flag { FlagStatus::Enabled => { diff --git a/rs/replicated_state/src/canister_state/execution_state.rs b/rs/replicated_state/src/canister_state/execution_state.rs index 833277f604d..685a7e30861 100644 --- a/rs/replicated_state/src/canister_state/execution_state.rs +++ b/rs/replicated_state/src/canister_state/execution_state.rs @@ -292,7 +292,8 @@ impl SandboxMemory { /// The owner of the sandbox memory. It's destructor must close the /// corresponding memory in the sandbox process. pub trait SandboxMemoryOwner: std::fmt::Debug + Send + Sync { - fn get_id(&self) -> usize; + fn get_sandbox_memory_id(&self) -> usize; + fn get_sandbox_process_id(&self) -> Option; } /// A handle to the sandbox memory that keeps the corresponding memory in the @@ -314,8 +315,14 @@ impl SandboxMemoryHandle { /// Returns a raw id of the memory in the sandbox process, which can be /// converted to sandbox `MemoryId` using `MemoryId::from()`. - pub fn get_id(&self) -> usize { - self.0.get_id() + pub fn get_sandbox_memory_id(&self) -> usize { + self.0.get_sandbox_memory_id() + } + + /// Returns the id of the sandbox process if the process is still running. + /// Returns `None` if the sandbox process has exited. + pub fn get_sandbox_process_id(&self) -> Option { + self.0.get_sandbox_process_id() } } /// The part of the canister state that can be accessed during execution diff --git a/rs/replicated_state/src/page_map/page_allocator/mmap.rs b/rs/replicated_state/src/page_map/page_allocator/mmap.rs index a30f0a77bb6..ac3b206f582 100644 --- a/rs/replicated_state/src/page_map/page_allocator/mmap.rs +++ b/rs/replicated_state/src/page_map/page_allocator/mmap.rs @@ -438,19 +438,24 @@ impl MmapBasedPageAllocatorCore { file_descriptor: FileDescriptor, backing_file_owner: BackingFileOwner, ) -> Self { - // SAFETY: The file descriptor is valid. - let file_len = unsafe { get_file_length(file_descriptor.fd) }; - Self { + let mut page_allocator = Self { id, allocation_area: Default::default(), allocated_pages: 0, deserialized_pages: 0, file_descriptor: file_descriptor.fd, - file_len, + file_len: 0, chunks: vec![], dropped_pages: vec![], backing_file_owner, - } + }; + // SAFETY: The file descriptor is valid. + let file_len = unsafe { get_file_length(file_descriptor.fd) }; + // Depending on how this page allocator is used, the existing pages in + // the file may be deserialized later on. We need to prepare for that + // potential deserialization. + page_allocator.grow_for_deserialization(file_len); + page_allocator } fn allocate_page(&mut self, page_allocator: &Arc) -> PageInner {