From 88a3f833def732762a6edf4a17c39682a402df5f Mon Sep 17 00:00:00 2001 From: j-mendez Date: Tue, 10 Dec 2024 13:40:08 -0500 Subject: [PATCH] chore(balance): add simple rebalance --- Cargo.lock | 12 ++++++------ spider/Cargo.toml | 2 +- spider/src/utils/detect_cpu.rs | 24 ++++++++++++++++-------- spider/src/utils/mod.rs | 10 +++++++++- spider_chrome/Cargo.toml | 2 +- spider_cli/Cargo.toml | 2 +- spider_transformations/Cargo.toml | 2 +- spider_utils/Cargo.toml | 2 +- spider_worker/Cargo.toml | 2 +- 9 files changed, 37 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b4c18b76..fdc185c1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4287,7 +4287,7 @@ dependencies = [ [[package]] name = "spider" -version = "2.21.7" +version = "2.21.8" dependencies = [ "ahash", "aho-corasick", @@ -4350,7 +4350,7 @@ dependencies = [ [[package]] name = "spider_chrome" -version = "2.21.7" +version = "2.21.8" dependencies = [ "adblock", "async-tungstenite", @@ -4387,7 +4387,7 @@ dependencies = [ [[package]] name = "spider_cli" -version = "2.21.7" +version = "2.21.8" dependencies = [ "clap", "env_logger", @@ -4412,7 +4412,7 @@ dependencies = [ [[package]] name = "spider_transformations" -version = "2.21.7" +version = "2.21.8" dependencies = [ "aho-corasick", "fast_html2md", @@ -4434,7 +4434,7 @@ dependencies = [ [[package]] name = "spider_utils" -version = "2.21.7" +version = "2.21.8" dependencies = [ "indexmap 1.9.3", "serde", @@ -4446,7 +4446,7 @@ dependencies = [ [[package]] name = "spider_worker" -version = "2.21.7" +version = "2.21.8" dependencies = [ "env_logger", "lazy_static", diff --git a/spider/Cargo.toml b/spider/Cargo.toml index 42b57fd5a..9fd16583a 100644 --- a/spider/Cargo.toml +++ b/spider/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider" -version = "2.21.7" +version = "2.21.8" authors = [ "j-mendez " ] diff --git a/spider/src/utils/detect_cpu.rs b/spider/src/utils/detect_cpu.rs index ae533b511..ff2340b1b 100644 --- a/spider/src/utils/detect_cpu.rs +++ b/spider/src/utils/detect_cpu.rs @@ -1,20 +1,21 @@ -use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::atomic::{AtomicI8, Ordering}; use sysinfo::System; use tokio::sync::OnceCell; use tokio::time::sleep; -/// Atomic value to store CPU usage. -static CPU_USAGE: AtomicUsize = AtomicUsize::new(0); +/// The CPU state for the crawl. +/// +static CPU_STATE: AtomicI8 = AtomicI8::new(0); /// `OnceCell` CPU tracking. static INIT: OnceCell<()> = OnceCell::const_new(); /// Get the total avg CPU being used. -fn get_cpu_usage(sys: &System) -> usize { +fn get_cpu_usage(sys: &System) -> f32 { sys.cpus() .iter() .map(|cpu| cpu.cpu_usage() / sys.cpus().len() as f32) - .sum::() as usize + .sum::() } /// Update the cpu usage being used. @@ -25,7 +26,14 @@ async fn update_cpu_usage() { loop { sys.refresh_cpu_usage(); let usage = get_cpu_usage(&sys); - CPU_USAGE.store(usage, Ordering::Relaxed); + let state = if usage >= 70.0 { + 1 + } else if usage >= 95.0 { + 2 + } else { + 0 + }; + CPU_STATE.store(state, Ordering::Relaxed); sleep(sysinfo::MINIMUM_CPU_UPDATE_INTERVAL).await; } } @@ -40,7 +48,7 @@ async fn init_once() { } /// Get the cpu usage being used utility. -pub async fn get_global_cpu_usage() -> usize { +pub async fn get_global_cpu_usage() -> i8 { init_once().await; - CPU_USAGE.load(Ordering::Relaxed) + CPU_STATE.load(Ordering::Relaxed) } diff --git a/spider/src/utils/mod.rs b/spider/src/utils/mod.rs index 875bccea1..051452b31 100644 --- a/spider/src/utils/mod.rs +++ b/spider/src/utils/mod.rs @@ -2998,6 +2998,10 @@ where set.spawn(future) } +#[cfg(feature = "balance")] +/// Period to wait to rebalance cpu in means of IO being main impact. +const REBALANCE_TIME: std::time::Duration = std::time::Duration::from_millis(100); + /// Return the semaphore that should be used. #[cfg(feature = "balance")] pub async fn get_semaphore(semaphore: &Arc, detect: bool) -> &Arc { @@ -3007,7 +3011,11 @@ pub async fn get_semaphore(semaphore: &Arc, detect: bool) -> &Arc= 70 { + if cpu_load == 2 { + tokio::time::sleep(REBALANCE_TIME).await; + } + + if cpu_load >= 1 { &*crate::website::SEM_SHARED } else { semaphore diff --git a/spider_chrome/Cargo.toml b/spider_chrome/Cargo.toml index 212e59c93..ed7151c0b 100644 --- a/spider_chrome/Cargo.toml +++ b/spider_chrome/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_chrome" -version = "2.21.7" +version = "2.21.8" rust-version = "1.70" authors = [ "j-mendez " diff --git a/spider_cli/Cargo.toml b/spider_cli/Cargo.toml index bc5443bad..84ad18d7e 100644 --- a/spider_cli/Cargo.toml +++ b/spider_cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_cli" -version = "2.21.7" +version = "2.21.8" authors = [ "j-mendez " ] diff --git a/spider_transformations/Cargo.toml b/spider_transformations/Cargo.toml index 26242e02a..827869c4c 100644 --- a/spider_transformations/Cargo.toml +++ b/spider_transformations/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_transformations" -version = "2.21.7" +version = "2.21.8" authors = [ "j-mendez " ] diff --git a/spider_utils/Cargo.toml b/spider_utils/Cargo.toml index 171389406..6c450d02c 100644 --- a/spider_utils/Cargo.toml +++ b/spider_utils/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_utils" -version = "2.21.7" +version = "2.21.8" authors = [ "j-mendez " ] diff --git a/spider_worker/Cargo.toml b/spider_worker/Cargo.toml index e6bf439fc..40e19467f 100644 --- a/spider_worker/Cargo.toml +++ b/spider_worker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spider_worker" -version = "2.21.7" +version = "2.21.8" authors = [ "j-mendez " ]