From f0e618f45ec111f77ff5464c44b98172aab8bac9 Mon Sep 17 00:00:00 2001 From: Scott Prutton Date: Thu, 18 Jan 2024 16:12:47 -0500 Subject: [PATCH] feat: add PoolNoodle to manage firecracker jails --- Cargo.lock | 13 +- bin/veritech/scripts/stop.sh | 7 +- lib/deadpool-cyclone/src/instance.rs | 10 +- .../src/instance/cyclone/firecracker-setup.sh | 28 +- .../src/instance/cyclone/local_http.rs | 12 +- .../src/instance/cyclone/local_uds.rs | 57 +++-- lib/deadpool-cyclone/src/lib.rs | 51 +++- lib/deadpool-cyclone/src/pool_noodle.rs | 2 + .../src/pool_noodle/pool_noodle.rs | 240 ++++++++++++++++++ lib/veritech-server/src/server.rs | 2 +- third-party/rust/BUCK | 57 ++--- third-party/rust/Cargo.lock | 13 +- 12 files changed, 389 insertions(+), 103 deletions(-) create mode 100644 lib/deadpool-cyclone/src/pool_noodle.rs create mode 100644 lib/deadpool-cyclone/src/pool_noodle/pool_noodle.rs diff --git a/Cargo.lock b/Cargo.lock index dbf2fe2728..6a00a6753c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,9 +94,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fde6067df7359f2d6335ec1a50c1f8f825801687d10da0cc4c6b08e3f6afd15" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -5125,9 +5125,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "3b187f0231d56fe41bfb12034819dd2bf336422a5866de41bc3fec4b2e3883e8" [[package]] name = "socket2" @@ -5818,10 +5818,9 @@ dependencies = [ [[package]] name = "tokio-postgres-rustls" -version = "0.11.0" -source = "git+https://github.com/jbg/tokio-postgres-rustls.git?branch=master#f210aa24c4788e7f3bd9af52c4f7014c5561ac70" +version = "0.11.1" +source = "git+https://github.com/jbg/tokio-postgres-rustls.git?branch=master#b8de7acc8067a3ec4b7e99ba6ea654fae8e28fe4" dependencies = [ - "futures", "ring", "rustls 0.22.2", "tokio", diff --git a/bin/veritech/scripts/stop.sh b/bin/veritech/scripts/stop.sh index 3b4d71d5ab..d5ea6dd4ef 100755 --- a/bin/veritech/scripts/stop.sh +++ b/bin/veritech/scripts/stop.sh @@ -30,7 +30,12 @@ userdel jailer-$SB_ID || true # Remove directories and files umount /srv/jailer/firecracker/$SB_ID/root/rootfs.ext4 || true -dmsetup remove rootfs-overlay-$SB_ID || true +until ! mountpoint -q /srv/jailer/firecracker/$SB_ID/root/rootfs.ext4 +do + echo "waiting for unmount" + sleep .1 +done +dmsetup remove --retry rootfs-overlay-$SB_ID # We are not currently creating these. # umount /srv/jailer/firecracker/$SB_ID/root/image-kernel.bin || true diff --git a/lib/deadpool-cyclone/src/instance.rs b/lib/deadpool-cyclone/src/instance.rs index e64f562c0e..d7d8526a15 100644 --- a/lib/deadpool-cyclone/src/instance.rs +++ b/lib/deadpool-cyclone/src/instance.rs @@ -79,7 +79,12 @@ pub trait Spec { /// # }); /// # Ok::<(), SpawnError>(()) /// ``` - async fn spawn(&self) -> result::Result; + async fn spawn(&self, id: u32) -> result::Result; + + /// whether to enable pool_noodle when using this spec + fn use_pool_noodle(&self) -> bool; + /// the size of the pool + fn pool_size(&self) -> u16; } /// A type which implements the [Builder pattern] and builds a [`Spec`]. @@ -311,6 +316,9 @@ pub trait Instance { /// # Ok::<(), TerminationError>(()) /// ``` async fn terminate(&mut self) -> result::Result<(), Self::Error>; + + /// Get the id of the underlying child runtime + fn id(&self) -> u32; } // async fn spawn(builder: &B) -> Result, E> diff --git a/lib/deadpool-cyclone/src/instance/cyclone/firecracker-setup.sh b/lib/deadpool-cyclone/src/instance/cyclone/firecracker-setup.sh index ea5f451701..ef6fa9fcaf 100755 --- a/lib/deadpool-cyclone/src/instance/cyclone/firecracker-setup.sh +++ b/lib/deadpool-cyclone/src/instance/cyclone/firecracker-setup.sh @@ -280,7 +280,7 @@ execute_cleanup() { } -prepare_jailers() { +clean_jails() { ITERATIONS="${1:-100}" # Default to 100 jails DOWNLOAD_ROOTFS="${2:-false}" # Default to false DOWNLOAD_KERNEL="${3:-false}" # Default to false @@ -302,32 +302,10 @@ prepare_jailers() { fi /firecracker-data/stop.sh $iter &> /dev/null & done + wait echo echo "Elapsed: $(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec" fi - - if test -f "/firecracker-data/prepare_jailer.sh"; then - IN_PARALLEL=1 - SECONDS=0 - for (( iter=0; iter<$ITERATIONS; iter++ )) - do - echo -ne "Validating jail $(($iter + 1 )) out of $ITERATIONS ... \r" - # this ensures we only run n jobs in parallel at a time to avoid - # process locks. This is an unreliable hack. - # TODO(scott): we need to walk through the processes called in this script - # and understand where locking could occur. Parallelization can be - # dangerous here, but testing implies that it works. - if [ $(jobs -r | wc -l) -ge $IN_PARALLEL ]; then - wait $(jobs -r -p | head -1) - fi - /firecracker-data/prepare_jailer.sh $iter & - done - echo - echo "Elapsed: $(($SECONDS / 3600))hrs $((($SECONDS / 60) % 60))min $(($SECONDS % 60))sec" - else - echo "prepare_jailer.sh script not found, skipping jail creation." - exit 1 - fi } # ----------------------------------------- @@ -394,5 +372,5 @@ check_params_set && echo -e "Installation Values found to be:\n - $VARIABLES_FIL check_os_release && echo -e "Operating System found to be:\n - $OS_VARIANT" install_pre_reqs execute_configuration_management $DOWNLOAD_ROOTFS $DOWNLOAD_KERNEL -prepare_jailers $JAILS_TO_CREATE $DOWNLOAD_ROOTFS $DOWNLOAD_KERNEL $FORCE_CLEAN_JAILS +clean_jails $JAILS_TO_CREATE $DOWNLOAD_ROOTFS $DOWNLOAD_KERNEL $FORCE_CLEAN_JAILS execute_cleanup diff --git a/lib/deadpool-cyclone/src/instance/cyclone/local_http.rs b/lib/deadpool-cyclone/src/instance/cyclone/local_http.rs index d9efdaaa59..2f2947f865 100644 --- a/lib/deadpool-cyclone/src/instance/cyclone/local_http.rs +++ b/lib/deadpool-cyclone/src/instance/cyclone/local_http.rs @@ -97,6 +97,9 @@ impl Instance for LocalHttpInstance { Ok(()) } + fn id(&self) -> u32 { + 0 + } } #[async_trait] @@ -293,10 +296,17 @@ impl Spec for LocalHttpInstanceSpec { type Instance = LocalHttpInstance; type Error = LocalHttpInstanceError; + fn use_pool_noodle(&self) -> bool { + false + } + fn pool_size(&self) -> u16 { + 0 + } + async fn setup(&self) -> result::Result<(), Self::Error> { Ok(()) } - async fn spawn(&self) -> result::Result { + async fn spawn(&self, _id: u32) -> result::Result { let socket_addr = socket_addr_from(&self.socket_strategy).await?; let mut cmd = self.build_command(&socket_addr); diff --git a/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs b/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs index 80cbfcd5d1..0e8df7ba77 100644 --- a/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs +++ b/lib/deadpool-cyclone/src/instance/cyclone/local_uds.rs @@ -140,6 +140,9 @@ impl Instance for LocalUdsInstance { Ok(()) } + fn id(&self) -> u32 { + self.runtime.id() + } } #[async_trait] @@ -345,6 +348,16 @@ impl Spec for LocalUdsInstanceSpec { type Instance = LocalUdsInstance; type Error = LocalUdsInstanceError; + fn use_pool_noodle(&self) -> bool { + matches!( + self.runtime_strategy, + LocalUdsRuntimeStrategy::LocalFirecracker + ) + } + fn pool_size(&self) -> u16 { + self.pool_size + } + async fn setup(&self) -> result::Result<(), Self::Error> { match self.runtime_strategy { LocalUdsRuntimeStrategy::LocalDocker => Ok(()), @@ -353,9 +366,9 @@ impl Spec for LocalUdsInstanceSpec { } } - async fn spawn(&self) -> result::Result { + async fn spawn(&self, id: u32) -> result::Result { let (temp_path, socket) = temp_path_and_socket_from(&self.socket_strategy)?; - let mut runtime = runtime_instance_from_spec(self, &socket).await?; + let mut runtime = runtime_instance_from_spec(self, &socket, id).await?; runtime.spawn().await?; //TODO(scott): Firecracker requires the client to add a special connection detail. We @@ -571,6 +584,7 @@ impl Default for LocalUdsRuntimeStrategy { #[async_trait] pub trait LocalInstanceRuntime: Send + Sync { + fn id(&self) -> u32; fn socket(&mut self) -> PathBuf; async fn spawn(&mut self) -> result::Result<(), LocalUdsInstanceError>; async fn terminate(&mut self) -> result::Result<(), LocalUdsInstanceError>; @@ -623,6 +637,9 @@ impl LocalProcessRuntime { #[async_trait] impl LocalInstanceRuntime for LocalProcessRuntime { + fn id(&self) -> u32 { + 0 + } fn socket(&mut self) -> PathBuf { self.socket.to_path_buf() } @@ -766,6 +783,9 @@ impl LocalDockerRuntime { #[async_trait] impl LocalInstanceRuntime for LocalDockerRuntime { + fn id(&self) -> u32 { + 0 + } fn socket(&mut self) -> PathBuf { self.socket.to_path_buf() } @@ -798,22 +818,13 @@ impl LocalInstanceRuntime for LocalDockerRuntime { struct LocalFirecrackerRuntime { cmd: Command, child: Option, + vm_id: u32, socket: PathBuf, } impl LocalFirecrackerRuntime { - async fn build() -> Result> { - // todo(scott): the pid is a naive method for removing serially creating jails. - // we read the pid, use that as the vm_id, and then increment it. - // There are obvious contention problems here, but we should address - // this within deadpool itself. - let pid = Path::new("/firecracker-data/pid"); - let vm_id = &std::fs::read_to_string(pid) - .map_err(LocalUdsInstanceError::FirecrackerPidRead)? - .parse::() - .map_err(LocalUdsInstanceError::FirecrackerPidParse)?; - std::fs::write(pid, format!("{}", vm_id + 1)) - .map_err(LocalUdsInstanceError::FirecrackerPidWrite)?; + async fn build(id: u32) -> Result> { + let vm_id = id; let mut cmd = Command::new("/usr/bin/jailer"); cmd.arg("--cgroup-version") @@ -836,6 +847,7 @@ impl LocalFirecrackerRuntime { Ok(Box::new(LocalFirecrackerRuntime { cmd, child: None, + vm_id, socket: sock, })) } @@ -843,6 +855,9 @@ impl LocalFirecrackerRuntime { #[async_trait] impl LocalInstanceRuntime for LocalFirecrackerRuntime { + fn id(&self) -> u32 { + self.vm_id + } fn socket(&mut self) -> PathBuf { self.socket.to_path_buf() } @@ -894,6 +909,7 @@ impl LocalInstanceRuntime for LocalFirecrackerRuntime { async fn runtime_instance_from_spec( spec: &LocalUdsInstanceSpec, socket: &PathBuf, + id: u32, ) -> Result> { match spec.runtime_strategy { LocalUdsRuntimeStrategy::LocalProcess => { @@ -902,14 +918,13 @@ async fn runtime_instance_from_spec( LocalUdsRuntimeStrategy::LocalDocker => { LocalDockerRuntime::build(socket, spec.clone()).await } - LocalUdsRuntimeStrategy::LocalFirecracker => LocalFirecrackerRuntime::build().await, + LocalUdsRuntimeStrategy::LocalFirecracker => LocalFirecrackerRuntime::build(id).await, } } async fn setup_firecracker(spec: &LocalUdsInstanceSpec) -> Result<()> { let script_bytes = include_bytes!("firecracker-setup.sh"); let command = Path::new("/firecracker-data/firecracker-setup.sh"); - let pid = Path::new("/firecracker-data/pid"); // we need to ensure the file is in the correct location with the correct permissions std::fs::create_dir_all( @@ -922,20 +937,14 @@ async fn setup_firecracker(spec: &LocalUdsInstanceSpec) -> Result<()> { std::fs::set_permissions(command, std::fs::Permissions::from_mode(0o755)) .map_err(LocalUdsInstanceError::FirecrackerSetupPermissions)?; - // firecracker pid is used to serialize jail creation - std::fs::create_dir_all( - pid.parent() - .expect("This should never happen. Did you remove the path from the string above?"), - ) - .map_err(LocalUdsInstanceError::FirecrackerSetupCreate)?; - std::fs::write(pid, "0").map_err(LocalUdsInstanceError::FirecrackerSetupWrite)?; // Spawn the shell process let _status = Command::new("sudo") .arg(command) .arg("-j") .arg(&spec.pool_size.to_string()) .arg("-rk") - .status(); + .status() + .await; Ok(()) } diff --git a/lib/deadpool-cyclone/src/lib.rs b/lib/deadpool-cyclone/src/lib.rs index 4b587e79f2..d230656da3 100644 --- a/lib/deadpool-cyclone/src/lib.rs +++ b/lib/deadpool-cyclone/src/lib.rs @@ -15,11 +15,15 @@ clippy::module_name_repetitions )] -use async_trait::async_trait; use deadpool::managed::{self, Metrics}; +use tokio::sync::Mutex; + +use async_trait::async_trait; +use pool_noodle::pool_noodle::PoolNoodleError; use thiserror::Error; pub use self::instance::{Instance, Spec}; +pub use crate::pool_noodle::pool_noodle::PoolNoodle; pub use cyclone_client::{ ClientError, CycloneClient, CycloneEncryptionKey, CycloneEncryptionKeyError, ExecutionError, @@ -34,6 +38,8 @@ pub use cyclone_core::{ /// [`Instance`] implementations. pub mod instance; +/// [`PoolNoodle`] implementations. +pub mod pool_noodle; /// Type alias for using [`managed::Pool`] with Cyclone. pub type Pool = managed::Pool>; @@ -67,8 +73,8 @@ pub enum ManagerError { } /// [`Manager`] for creating and recycling generic [`Instance`]s. -#[derive(Debug)] pub struct Manager { + pool_noodle: Mutex, spec: S, } @@ -78,12 +84,19 @@ where { /// Creates a new [`Manager`] from the given instance specification. pub fn new(spec: S) -> Self { - Self { spec } + Self { + pool_noodle: PoolNoodle::new(spec.pool_size().into()).into(), + spec, + } } /// Peforms any necessary setup work to ensure the host can run the pool members. - pub async fn setup(&self) -> Result<(), S::Error> { - self.spec.setup().await + pub async fn setup(&mut self) -> Result<(), S::Error> { + self.spec.setup().await?; + if self.spec.use_pool_noodle() { + self.pool_noodle.lock().await.start(); + } + Ok(()) } } @@ -98,7 +111,19 @@ where type Error = E; async fn create(&self) -> Result { - self.spec.spawn().await + let id = if self.spec.use_pool_noodle() { + self + .pool_noodle + .lock() + .await + .get_ready_jail() + .await + .map_err(|_| PoolNoodleError::ExecutionPoolStarved) + .expect("Function execution is impossible as the execution pool is starved and not recovering.") + } else { + 0 + }; + self.spec.spawn(id).await } async fn recycle( @@ -106,7 +131,19 @@ where obj: &mut Self::Type, _: &Metrics, ) -> managed::RecycleResult { - obj.ensure_healthy().await.map_err(Into::into) + match obj.ensure_healthy().await { + Ok(_) => Ok(()), + Err(err) => { + if self.spec.use_pool_noodle() { + self.pool_noodle + .lock() + .await + .set_as_to_be_cleaned(obj.id()) + .await; + } + Result::map_err(Err(err), Into::into) + } + } } } diff --git a/lib/deadpool-cyclone/src/pool_noodle.rs b/lib/deadpool-cyclone/src/pool_noodle.rs new file mode 100644 index 0000000000..a817fc8909 --- /dev/null +++ b/lib/deadpool-cyclone/src/pool_noodle.rs @@ -0,0 +1,2 @@ +/// [`PoolNoodle`] implementations. +pub mod pool_noodle; diff --git a/lib/deadpool-cyclone/src/pool_noodle/pool_noodle.rs b/lib/deadpool-cyclone/src/pool_noodle/pool_noodle.rs new file mode 100644 index 0000000000..f65be1c256 --- /dev/null +++ b/lib/deadpool-cyclone/src/pool_noodle/pool_noodle.rs @@ -0,0 +1,240 @@ +use std::sync::Arc; +use tokio::process::Command; +use tokio::sync::Mutex; +use tokio::time::Duration; +use tracing::{debug, trace, warn}; + +use std::{collections::VecDeque, result}; +use thiserror::Error; +use tokio::time; + +const MINUMUM_JAIL_PERCENTAGE: f32 = 0.25; + +type Result = result::Result; +///--------------------------------------------------------------------- +///--------------------------------------------------------------------- +///---------------------------:::::::::::::::::::::--------------------- +///---------------------:::::::::::::::::::::-------::::::-------------- +///-----------------:::::::::::::::::::==========------::::::----------- +///----#*+#-----::::::::::::::::::::::::---:::--==++++-:::::::::-------- +///---+#%@@#----::::::::::::::::::===========++++***#*=::::::::::::----- +///--=+*@@@@@*::::::::::::::::::::========++++****###*=::::::::::::::--- +///--=@@@@@@@@%+::::::::::::::::::-=======+++++**###%#+:::::::::::::::-- +///----=@@@@@@@@%=:::::::::::::::::=========++++*###%#+-:::::::::::::::: +///------#@@@@@@@@%-:::::::::::::::-....:===:....:*#%#+-:::::::::::::::: +///-----:::#@@@@@@@@%-::::::::::::..:-=.::+.:=**:-==%#*=:::::::::::::::: +///----::::::%@@@@@@@@%=::::::::::.-#%@@--=.#%@@#*#*%%*=:::::::::::::::: +///---:::::::::@@@@@@@@@%=::::::..:-*@@***#+-+####*%%%#+:::::::::::::::: +///-:::::::::::%@@@@@@@@@@%#:.......+***#%%###**#%%%%%#+:::::::::::::::: +///:::::::::::*#**#@@@@@@@@@%:......=+*#%%%%%@%#%%%%%%#*-::::::::::::::: +///::::::::::#@*+**%@@@@@@@@@=......:++*******####%%%%%*=::::::::::::::: +///:::::::::::=%##%@@@#%@%@@+........++++******###%%%%%*=::::::::::::::: +///:::::::::::=+*#@@%#%%%%@*.........++++++*****###%%%%#+.:::::::::::::: +///::::::::::=++===+*%*===+=.........++++++*****####%%%#**=::::::::::::: +///:::::::::===++++*#@*+++**=........-+++++******###%%%%#%###*-::::::::: +///::::::::-++===*%@@@@%%%##:........++++++******###%%%%%%%###**+::::::: +///:::::::::*%***%@@@@@%%%#:.......=+=+++++******####%%%%#%###****+::::: +///::::::::::*@@@@@@@%%%%%#......-+==+++*********####%%%%+:.=##****+-::: +///:::::::::::::%@@@-%%%%%#*:.:++++++************####%%%%+::::-***+**-:: +///--::::::::::::::::-#%%%###********#-**********####%%%#*.:::::**++**-: +///--::::::::::::::::::*#%%%#######*:..**********####%%%#*.:::::=*++**=: +///---:::::::::::::::::::-#%%###*-.....**********#####%%##::::::=*++**+: +///---::::::::::::::::::::::::::.......+*********#####%%%#-:::::+++**#=: +///---::::::::::::::::::::::::::::.....-********######%%%#+::::*++**##:: +///----:::::::::::::::::::::::::::::::::#*******######%%%#*****++**##=:: +///------::::::::::::::::::::::::::::::.********######%%%%##**+***#%=::: +///---------::::::::::::::::::::::::::::********######%%%%%*****##%-:::: +///---------::::::::::::::::::::::::::::+*******######%%%%%#**##%=:::--- +///----------:::::::::::::::::::::::::::-#*****#######%%%%%-:+#=::::---- +///:------------:::::::::::::::::::::::::##****#######%%%%%+:::::::----- +///=:----------::::::::::::::::::::::::::*#***########%%%%%*::::::------ +/// Pool Noodle is a tool for ensuring that we maintain a bare minimum number of Firecracker Jails +/// for function execution. We wrap it in an Arc Mutex so we can update the queues it manages +/// across threads. +pub struct PoolNoodle(Arc>); + +/// Error type for [`PoolNoodle`]. +#[remain::sorted] +#[derive(Debug, Error)] +pub enum PoolNoodleError { + /// Failed to clean a jail. + #[error("Failed to clean the jail")] + CleanJail, + /// Failed to get a new jail ID. + #[error("Failed to get a new jail from the execution pool!")] + ExecutionPoolStarved, + /// Failed to prepare a new jail. + #[error("Failed to prepare the jail")] + PrepareJail, + /// Failed to set a jail to be cleaned. + #[error("Failed to set a jail to be cleaned.")] + SetClean, +} + +/// Inner struct to excpsulate the queues of jails in different states. +/// +/// pool_size: the total number of jails we want to manage +/// ready: jails that can currently be used to run functions +/// to_be_cleaned: jails that have been used and must be cleaned up +/// unprepared: jails that are available to be prepared and moved into a ready state +pub struct PoolNoodleInner { + pool_size: u32, + ready: Vec, + to_be_cleaned: VecDeque, + unprepared: VecDeque, +} + +impl PoolNoodle { + /// Creates a new instance of PoolNoodle + pub fn new(pool_size: u32) -> Self { + PoolNoodle(Arc::new( + PoolNoodleInner { + pool_size, + ready: Vec::new(), + to_be_cleaned: VecDeque::new(), + unprepared: VecDeque::from_iter(0..pool_size), + } + .into(), + )) + } + + /// Starts the loop responsible for jail lifetimes. The loop works by: + /// 1. Check if we have fewer ready jails than the `[MINUMUM_JAIL_PERCENTAGE]` of `[pool_size]` + /// 2. If so, go get an unprepared jail and prepare it! + /// 3. If not, check if there are any jails that need to be cleaned. + /// 4. If so, clean them and move them to `[unprepared]` so they can be made ready. + /// 5. If not, do nothing! + /// + /// todo(scott): this is a brute force approach. I deally moving this to be event driving and + /// talking over channels will lets us simplify the cross-thread vec fun and the forver-looping + /// future that we just let run rampant. + #[allow(clippy::let_underscore_future)] // This needs to just run in the background forever. + pub fn start(&self) { + let me = Arc::clone(&self.0); + let _ = tokio::spawn(async move { + let mut interval = time::interval(Duration::from_millis(100)); + + loop { + interval.tick().await; + let mut me = me.lock().await; + let ready_len = me.ready.len() as f32; + let to_be_cleaned_len = me.to_be_cleaned.len(); + let unprepared_len = me.unprepared.len(); + let target = me.pool_size as f32 * MINUMUM_JAIL_PERCENTAGE; + + trace!( + "PoolNoodle Stats -- desired ready: {}, ready: {}, to be cleaned: {}, unprepared: {}", + target, + ready_len, + to_be_cleaned_len, + unprepared_len, + ); + + // we're at fewer than 25% of the total, let's make more jails + if ready_len < target && unprepared_len != 0 { + let id = match me.unprepared.pop_back() { + Some(id) => id, + None => { + warn!("PoolNoodle: failed to pop_back() unprepared when it should not be empty!"); + continue; + } + }; + + match PoolNoodle::prepare_jail(id).await { + Ok(_) => { + debug!("PoolNoodle: jail readied: {}", id); + me.ready.push(id); + } + Err(_) => { + warn!("PoolNoodle: failed to ready jail: {}", id); + me.unprepared.push_front(id); + } + } + // let's go clean some dead jails! + } else if to_be_cleaned_len != 0 { + // go get a jail that needs to be cleaned + let id = match me.to_be_cleaned.pop_back() { + Some(id) => id, + None => { + warn!("PoolNoodle: failed to pop_back() to_be_cleaned when it should not be empty!"); + continue; + } + }; + // attempt to clean it + match PoolNoodle::clean_jail(id).await { + // it worked! + Ok(_) => { + debug!("PoolNoodle: jail cleaned: {}", id); + me.unprepared.push_back(id); + } + // it did not work. We should move on to a different jail. This one will be + // abandoned. + Err(_) => { + warn!("PoolNoodle: failed to clean jail: {}", id); + } + }; + } else { + continue; + }; + } + }); + } + + /// This readies a jail. This script is place in the correct location during Veritech startup. + /// todo(scott): This method should be replace with a Rust-native implementation. + async fn prepare_jail(id: u32) -> Result<()> { + let command = String::from("/firecracker-data/prepare_jailer.sh"); + let _status = Command::new("sudo") + .arg(command) + .arg(id.to_string()) + .status() + .await + .map_err(|_| PoolNoodleError::PrepareJail)?; + + Ok(()) + } + + /// This cleans a jail. This script is place in the correct location during Veritech startup. + /// todo(scott): This method should be replace with a Rust-native implementation. This could + /// also be made more efficient by only replacing the rootfs instead of cleaning everything. + async fn clean_jail(id: u32) -> Result<()> { + let command = String::from("/firecracker-data/stop.sh"); + let _status = Command::new("sudo") + .arg(command) + .arg(id.to_string()) + .output() + .await + .map_err(|_| PoolNoodleError::CleanJail)?; + + Ok(()) + } + + /// This pops a ready jail from the stack and returns its Id so it can be executed. + /// If there are no ready jails, it will retry until it either gets one or retries out. + pub async fn get_ready_jail(&mut self) -> Result { + let mut retries = 30; + loop { + trace!("PoolNoodle: getting a ready jail."); + match self.0.lock().await.ready.pop() { + Some(id) => { + debug!("PoolNoodle: got ready jail: {}", id); + break Ok(id); + } + None => { + warn!("PoolNoodle: execution pool starved! Trying again."); + } + } + + if retries < 1 { + return Err(PoolNoodleError::ExecutionPoolStarved); + } + retries -= 1; + time::sleep(Duration::from_millis(1000)).await; + } + } + + /// This marks a jail as needing to be cleaned + pub async fn set_as_to_be_cleaned(&mut self, id: u32) { + self.0.lock().await.to_be_cleaned.push_front(id) + } +} diff --git a/lib/veritech-server/src/server.rs b/lib/veritech-server/src/server.rs index 99c52c9671..f1152f20ea 100644 --- a/lib/veritech-server/src/server.rs +++ b/lib/veritech-server/src/server.rs @@ -107,7 +107,7 @@ impl Server { let (shutdown_broadcast_tx, _) = broadcast::channel(16); let nats = connect_to_nats(&config).await?; - let manager = Manager::new(spec.clone()); + let mut manager = Manager::new(spec.clone()); manager .setup() .await diff --git a/third-party/rust/BUCK b/third-party/rust/BUCK index 5c105c27d2..0faabd5edf 100644 --- a/third-party/rust/BUCK +++ b/third-party/rust/BUCK @@ -34,7 +34,7 @@ git_fetch( git_fetch( name = "tokio-postgres-rustls-855e14009fe3bebe.git", repo = "https://github.com/jbg/tokio-postgres-rustls.git", - rev = "f210aa24c4788e7f3bd9af52c4f7014c5561ac70", + rev = "b8de7acc8067a3ec4b7e99ba6ea654fae8e28fe4", visibility = [], ) @@ -276,18 +276,18 @@ cargo.rust_library( ) http_archive( - name = "anstream-0.6.9.crate", - sha256 = "3fde6067df7359f2d6335ec1a50c1f8f825801687d10da0cc4c6b08e3f6afd15", - strip_prefix = "anstream-0.6.9", - urls = ["https://crates.io/api/v1/crates/anstream/0.6.9/download"], + name = "anstream-0.6.11.crate", + sha256 = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5", + strip_prefix = "anstream-0.6.11", + urls = ["https://crates.io/api/v1/crates/anstream/0.6.11/download"], visibility = [], ) cargo.rust_library( - name = "anstream-0.6.9", - srcs = [":anstream-0.6.9.crate"], + name = "anstream-0.6.11", + srcs = [":anstream-0.6.11.crate"], crate = "anstream", - crate_root = "anstream-0.6.9.crate/src/lib.rs", + crate_root = "anstream-0.6.11.crate/src/lib.rs", edition = "2021", features = [ "auto", @@ -1120,7 +1120,7 @@ cargo.rust_library( visibility = [], deps = [ ":bytes-1.5.0", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ], ) @@ -1925,7 +1925,7 @@ cargo.rust_library( ], visibility = [], deps = [ - ":anstream-0.6.9", + ":anstream-0.6.11", ":anstyle-1.0.4", ":clap_lex-0.6.0", ":strsim-0.10.0", @@ -7639,7 +7639,7 @@ cargo.rust_library( ":num-iter-0.1.43", ":num-traits-0.2.17", ":rand-0.8.5", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":zeroize-1.7.0", ], ) @@ -8608,7 +8608,7 @@ cargo.rust_library( visibility = [], deps = [ ":cfg-if-1.0.0", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ], ) @@ -12841,22 +12841,22 @@ cargo.rust_library( edition = "2018", features = ["union"], visibility = [], - deps = [":smallvec-1.12.0"], + deps = [":smallvec-1.13.0"], ) http_archive( - name = "smallvec-1.12.0.crate", - sha256 = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e", - strip_prefix = "smallvec-1.12.0", - urls = ["https://crates.io/api/v1/crates/smallvec/1.12.0/download"], + name = "smallvec-1.13.0.crate", + sha256 = "3b187f0231d56fe41bfb12034819dd2bf336422a5866de41bc3fec4b2e3883e8", + strip_prefix = "smallvec-1.13.0", + urls = ["https://crates.io/api/v1/crates/smallvec/1.13.0/download"], visibility = [], ) cargo.rust_library( - name = "smallvec-1.12.0", - srcs = [":smallvec-1.12.0.crate"], + name = "smallvec-1.13.0", + srcs = [":smallvec-1.13.0.crate"], crate = "smallvec", - crate_root = "smallvec-1.12.0.crate/src/lib.rs", + crate_root = "smallvec-1.13.0.crate/src/lib.rs", edition = "2018", features = [ "const_generics", @@ -13138,7 +13138,7 @@ cargo.rust_library( ":serde-1.0.195", ":serde_json-1.0.111", ":sha2-0.10.8", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":sqlformat-0.2.3", ":thiserror-1.0.56", ":time-0.3.31", @@ -13213,7 +13213,7 @@ cargo.rust_library( ":serde_json-1.0.111", ":sha1-0.10.6", ":sha2-0.10.8", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":sqlx-core-0.7.3", ":stringprep-0.1.4", ":thiserror-1.0.56", @@ -13879,7 +13879,7 @@ cargo.rust_binary( ":thiserror-1.0.56", ":tokio-1.35.1", ":tokio-postgres-0.7.10", - ":tokio-postgres-rustls-0.11.0", + ":tokio-postgres-rustls-0.11.1", ":tokio-serde-0.8.0", ":tokio-stream-0.1.14", ":tokio-test-0.4.3", @@ -14327,19 +14327,18 @@ cargo.rust_library( alias( name = "tokio-postgres-rustls", - actual = ":tokio-postgres-rustls-0.11.0", + actual = ":tokio-postgres-rustls-0.11.1", visibility = ["PUBLIC"], ) cargo.rust_library( - name = "tokio-postgres-rustls-0.11.0", + name = "tokio-postgres-rustls-0.11.1", srcs = [":tokio-postgres-rustls-855e14009fe3bebe.git"], crate = "tokio_postgres_rustls", crate_root = "tokio-postgres-rustls-855e14009fe3bebe/src/lib.rs", edition = "2018", visibility = [], deps = [ - ":futures-0.3.30", ":ring-0.17.5", ":rustls-0.22.2", ":tokio-1.35.1", @@ -15161,7 +15160,7 @@ cargo.rust_library( ":once_cell-1.19.0", ":opentelemetry-0.21.0", ":opentelemetry_sdk-0.21.2", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":tracing-0.1.40", ":tracing-core-0.1.32", ":tracing-log-0.2.0", @@ -15214,7 +15213,7 @@ cargo.rust_library( ":once_cell-1.19.0", ":regex-1.10.2", ":sharded-slab-0.1.7", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":thread_local-1.1.7", ":tracing-0.1.40", ":tracing-core-0.1.32", @@ -16470,7 +16469,7 @@ cargo.rust_library( ":serde-1.0.195", ":serde_json-1.0.111", ":smallstr-0.3.0", - ":smallvec-1.12.0", + ":smallvec-1.13.0", ":thiserror-1.0.56", ], ) diff --git a/third-party/rust/Cargo.lock b/third-party/rust/Cargo.lock index 7b6212c06b..5d0269440a 100644 --- a/third-party/rust/Cargo.lock +++ b/third-party/rust/Cargo.lock @@ -94,9 +94,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fde6067df7359f2d6335ec1a50c1f8f825801687d10da0cc4c6b08e3f6afd15" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -4444,9 +4444,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2593d31f82ead8df961d8bd23a64c2ccf2eb5dd34b0a34bfb4dd54011c72009e" +checksum = "3b187f0231d56fe41bfb12034819dd2bf336422a5866de41bc3fec4b2e3883e8" [[package]] name = "socket2" @@ -5202,10 +5202,9 @@ dependencies = [ [[package]] name = "tokio-postgres-rustls" -version = "0.11.0" -source = "git+https://github.com/jbg/tokio-postgres-rustls.git?branch=master#f210aa24c4788e7f3bd9af52c4f7014c5561ac70" +version = "0.11.1" +source = "git+https://github.com/jbg/tokio-postgres-rustls.git?branch=master#b8de7acc8067a3ec4b7e99ba6ea654fae8e28fe4" dependencies = [ - "futures", "ring", "rustls 0.22.2", "tokio",