From 1ce18e43e649a6c49f999c98575043c0561b5e32 Mon Sep 17 00:00:00 2001 From: Piotr Sarna Date: Wed, 3 May 2023 09:48:37 +0200 Subject: [PATCH] stats: add storage_bytes_used (#372) The new metrics track how many storage bytes are used by this sqld instance. It only tracks the main database file, under the assumption that the most interesting metrics for users is "how large is my database after I successfully checkpoint the write-ahead log". Right now we don't have a separate fiber that performs checkpoints, but that's planned. And once we have it, inspecting storage should happen right after the checkpoint. For now, the fiber that monitors storage used just runs once every 15 minutes. Fixes #340 --- sqld/src/heartbeat.rs | 5 +---- sqld/src/http/stats.rs | 22 ++++++++++++++++++---- sqld/src/lib.rs | 15 +++++++++++++++ sqld/src/stats.rs | 10 ++++++++++ 4 files changed, 44 insertions(+), 8 deletions(-) diff --git a/sqld/src/heartbeat.rs b/sqld/src/heartbeat.rs index 64b0198c..c15a8d34 100644 --- a/sqld/src/heartbeat.rs +++ b/sqld/src/heartbeat.rs @@ -13,10 +13,7 @@ pub async fn server_heartbeat( let client = reqwest::Client::new(); loop { sleep(update_period).await; - let body = StatsResponse { - rows_read_count: stats.rows_read(), - rows_written_count: stats.rows_written(), - }; + let body = StatsResponse::from(&stats); let request = client.post(&url); let request = if let Some(ref auth) = auth { request.header("Authorization", auth.clone()) diff --git a/sqld/src/http/stats.rs b/sqld/src/http/stats.rs index ac619cfe..efc0765f 100644 --- a/sqld/src/http/stats.rs +++ b/sqld/src/http/stats.rs @@ -7,13 +7,27 @@ use crate::stats::Stats; pub struct StatsResponse { pub rows_read_count: u64, pub rows_written_count: u64, + pub storage_bytes_used: u64, +} + +impl From<&Stats> for StatsResponse { + fn from(stats: &Stats) -> Self { + Self { + rows_read_count: stats.rows_read(), + rows_written_count: stats.rows_written(), + storage_bytes_used: stats.storage_bytes_used(), + } + } +} + +impl From for StatsResponse { + fn from(stats: Stats) -> Self { + (&stats).into() + } } pub fn handle_stats(stats: &Stats) -> Response { - let resp = StatsResponse { - rows_read_count: stats.rows_read(), - rows_written_count: stats.rows_written(), - }; + let resp: StatsResponse = stats.into(); let payload = serde_json::to_vec(&resp).unwrap(); Response::builder() diff --git a/sqld/src/lib.rs b/sqld/src/lib.rs index 6d232d6f..ec32405d 100644 --- a/sqld/src/lib.rs +++ b/sqld/src/lib.rs @@ -390,6 +390,19 @@ async fn start_primary( Ok(()) } +// Periodically check the storage used by the database and save it in the Stats structure. +// TODO: Once we have a separate fiber that does WAL checkpoints, running this routine +// right after checkpointing is exactly where it should be done. +async fn run_storage_monitor(mut db_path: PathBuf, stats: Stats) -> anyhow::Result<()> { + let duration = tokio::time::Duration::from_secs(60 * 15); + db_path.push("data"); + loop { + let attr = tokio::fs::metadata(&db_path).await; + stats.set_storage_bytes_used(attr.map_or(0, |stats| stats.len())); + tokio::time::sleep(duration).await; + } +} + pub async fn run_server(config: Config) -> anyhow::Result<()> { tracing::trace!("Backend: {:?}", config.backend); @@ -425,6 +438,8 @@ pub async fn run_server(config: Config) -> anyhow::Result<()> { let stats = Stats::new(&config.db_path)?; + join_set.spawn(run_storage_monitor(config.db_path.clone(), stats.clone())); + match config.writer_rpc_addr { Some(_) => start_replica(&config, &mut join_set, idle_shutdown_layer, stats).await?, None => start_primary(&config, &mut join_set, idle_shutdown_layer, stats).await?, diff --git a/sqld/src/stats.rs b/sqld/src/stats.rs index 2b79a848..2f443d9f 100644 --- a/sqld/src/stats.rs +++ b/sqld/src/stats.rs @@ -16,6 +16,7 @@ pub struct Stats { struct StatsInner { rows_written: AtomicU64, rows_read: AtomicU64, + storage_bytes_used: AtomicU64, } impl Stats { @@ -46,6 +47,10 @@ impl Stats { self.inner.rows_read.fetch_add(n, Ordering::Relaxed); } + pub fn set_storage_bytes_used(&self, n: u64) { + self.inner.storage_bytes_used.store(n, Ordering::Relaxed); + } + /// returns the total number of rows read since this database was created pub fn rows_read(&self) -> u64 { self.inner.rows_read.load(Ordering::Relaxed) @@ -55,6 +60,11 @@ impl Stats { pub fn rows_written(&self) -> u64 { self.inner.rows_written.load(Ordering::Relaxed) } + + /// returns the total number of bytes used by the database (excluding uncheckpointed WAL entries) + pub fn storage_bytes_used(&self) -> u64 { + self.inner.storage_bytes_used.load(Ordering::Relaxed) + } } fn spawn_stats_persist_thread(stats: Arc, mut file: File) {