From cbcd4058edb7a2c2bb3bfe1a6fc1ffb0d820b870 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 6 Sep 2024 14:33:52 +0200 Subject: [PATCH] Fix 1.82 clippy lint too_long_first_doc_paragraph (#8941) Addresses the 1.82 beta clippy lint `too_long_first_doc_paragraph` by adding newlines to the first sentence if it is short enough, and making a short first sentence if there is the need. --- compute_tools/src/pg_helpers.rs | 7 ++++--- libs/metrics/src/lib.rs | 1 + libs/pageserver_api/src/controller_api.rs | 2 ++ libs/pageserver_api/src/models.rs | 10 +++++++--- libs/postgres_backend/src/lib.rs | 6 ++++-- libs/postgres_connection/src/lib.rs | 1 + libs/remote_storage/src/lib.rs | 6 +++++- libs/tenant_size_model/src/lib.rs | 7 ++++--- libs/utils/src/circuit_breaker.rs | 6 ++++-- libs/utils/src/id.rs | 6 ++++-- libs/utils/src/lock_file.rs | 4 +++- libs/utils/src/pageserver_feedback.rs | 1 + libs/utils/src/poison.rs | 2 ++ libs/utils/src/shard.rs | 9 +++++---- libs/utils/src/simple_rcu.rs | 7 +++---- libs/utils/src/sync/heavier_once_cell.rs | 4 +++- libs/utils/src/vec_map.rs | 1 + libs/utils/src/yielding_loop.rs | 7 ++++--- pageserver/src/config.rs | 2 ++ pageserver/src/context.rs | 10 ++++++---- pageserver/src/pgdatadir_mapping.rs | 8 +++++--- pageserver/src/tenant.rs | 9 +++++---- pageserver/src/tenant/metadata.rs | 9 +++++---- pageserver/src/tenant/mgr.rs | 12 +++++++----- pageserver/src/tenant/remote_timeline_client.rs | 2 ++ .../src/tenant/remote_timeline_client/index.rs | 1 + pageserver/src/tenant/storage_layer.rs | 9 +++++---- pageserver/src/tenant/storage_layer/delta_layer.rs | 9 +++++---- pageserver/src/tenant/storage_layer/image_layer.rs | 8 +++++--- pageserver/src/tenant/storage_layer/layer_desc.rs | 6 ++++-- pageserver/src/tenant/storage_layer/layer_name.rs | 5 +++-- .../src/tenant/storage_layer/merge_iterator.rs | 8 +++++--- .../src/tenant/storage_layer/split_writer.rs | 14 ++++++++------ pageserver/src/tenant/vectored_blob_io.rs | 6 ++++-- pageserver/src/virtual_file.rs | 5 +++-- pageserver/src/walredo.rs | 11 +++++------ proxy/src/stream.rs | 1 + safekeeper/src/pull_timeline.rs | 1 + safekeeper/src/receive_wal.rs | 6 ++++-- safekeeper/src/state.rs | 8 +++++--- safekeeper/src/timeline.rs | 1 + safekeeper/src/timeline_eviction.rs | 8 +++++--- safekeeper/src/timeline_guard.rs | 4 +++- safekeeper/src/timeline_manager.rs | 1 + safekeeper/src/timelines_set.rs | 3 ++- safekeeper/src/wal_backup_partial.rs | 6 ++++-- safekeeper/src/wal_service.rs | 1 + storage_controller/src/service.rs | 4 +++- storage_scrubber/src/garbage.rs | 7 ++++--- storage_scrubber/src/metadata_stream.rs | 4 +++- storage_scrubber/src/pageserver_physical_gc.rs | 7 ++++--- 51 files changed, 180 insertions(+), 103 deletions(-) diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 863fa9468ff4..b2dc2658646c 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -22,9 +22,10 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role}; const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds -/// Escape a string for including it in a SQL literal. Wrapping the result -/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use -/// SQL string literal, e.g. `'db'''` or `E'db\\'`. +/// Escape a string for including it in a SQL literal. +/// +/// Wrapping the result with `E'{}'` or `'{}'` is not required, +/// as it returns a ready-to-use SQL string literal, e.g. `'db'''` or `E'db\\'`. /// See /// for the original implementation. pub fn escape_literal(s: &str) -> String { diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs index df000cd0fb28..cd4526c0894a 100644 --- a/libs/metrics/src/lib.rs +++ b/libs/metrics/src/lib.rs @@ -68,6 +68,7 @@ macro_rules! register_uint_gauge { static INTERNAL_REGISTRY: Lazy = Lazy::new(Registry::new); /// Register a collector in the internal registry. MUST be called before the first call to `gather()`. +/// /// Otherwise, we can have a deadlock in the `gather()` call, trying to register a new collector /// while holding the lock. pub fn register_internal(c: Box) -> prometheus::Result<()> { diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 94104af002e2..5c8dcbf57184 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -147,6 +147,8 @@ pub struct TenantDescribeResponseShard { pub preferred_az_id: Option, } +/// Migration request for a given tenant shard to a given node. +/// /// Explicitly migrating a particular shard is a low level operation /// TODO: higher level "Reschedule tenant" operation where the request /// specifies some constraints, e.g. asking it to get off particular node(s) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index d13d04eb1b11..ffe79c8350da 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -305,8 +305,10 @@ pub struct TenantConfig { pub lsn_lease_length_for_ts: Option, } -/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy` -/// tenant config. When the first aux file written, the policy will be persisted in the +/// The policy for the aux file storage. +/// +/// It can be switched through `switch_aux_file_policy` tenant config. +/// When the first aux file written, the policy will be persisted in the /// `index_part.json` file and has a limited migration path. /// /// Currently, we only allow the following migration path: @@ -896,7 +898,9 @@ pub struct WalRedoManagerStatus { pub process: Option, } -/// The progress of a secondary tenant is mostly useful when doing a long running download: e.g. initiating +/// The progress of a secondary tenant. +/// +/// It is mostly useful when doing a long running download: e.g. initiating /// a download job, timing out while waiting for it to run, and then inspecting this status to understand /// what's happening. #[derive(Default, Debug, Serialize, Deserialize, Clone)] diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index 7c7c6535b338..600f1d728c3b 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -69,8 +69,10 @@ impl QueryError { } /// Returns true if the given error is a normal consequence of a network issue, -/// or the client closing the connection. These errors can happen during normal -/// operations, and don't indicate a bug in our code. +/// or the client closing the connection. +/// +/// These errors can happen during normal operations, +/// and don't indicate a bug in our code. pub fn is_expected_io_error(e: &io::Error) -> bool { use io::ErrorKind::*; matches!( diff --git a/libs/postgres_connection/src/lib.rs b/libs/postgres_connection/src/lib.rs index 9f57f3d50750..ddf9f7b6109c 100644 --- a/libs/postgres_connection/src/lib.rs +++ b/libs/postgres_connection/src/lib.rs @@ -7,6 +7,7 @@ use std::fmt; use url::Host; /// Parses a string of format either `host:port` or `host` into a corresponding pair. +/// /// The `host` part should be a correct `url::Host`, while `port` (if present) should be /// a valid decimal u16 of digits only. pub fn parse_host_port>(host_port: S) -> Result<(Host, Option), anyhow::Error> { diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index cc1d3e0ae4d5..b5b69c9fafed 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -45,6 +45,8 @@ pub use azure_core::Etag; pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel}; +/// Default concurrency limit for S3 operations +/// /// Currently, sync happens with AWS S3, that has two limits on requests per second: /// ~200 RPS for IAM services /// @@ -300,7 +302,9 @@ pub trait RemoteStorage: Send + Sync + 'static { ) -> Result<(), TimeTravelError>; } -/// DownloadStream is sensitive to the timeout and cancellation used with the original +/// Data part of an ongoing [`Download`]. +/// +/// `DownloadStream` is sensitive to the timeout and cancellation used with the original /// [`RemoteStorage::download`] request. The type yields `std::io::Result` to be compatible /// with `tokio::io::copy_buf`. // This has 'static because safekeepers do not use cancellation tokens (yet) diff --git a/libs/tenant_size_model/src/lib.rs b/libs/tenant_size_model/src/lib.rs index a3e12cf0e3bf..974a49840432 100644 --- a/libs/tenant_size_model/src/lib.rs +++ b/libs/tenant_size_model/src/lib.rs @@ -5,9 +5,10 @@ mod calculation; pub mod svg; -/// StorageModel is the input to the synthetic size calculation. It represents -/// a tree of timelines, with just the information that's needed for the -/// calculation. This doesn't track timeline names or where each timeline +/// StorageModel is the input to the synthetic size calculation. +/// +/// It represents a tree of timelines, with just the information that's needed +/// for the calculation. This doesn't track timeline names or where each timeline /// begins and ends, for example. Instead, it consists of "points of interest" /// on the timelines. A point of interest could be the timeline start or end point, /// the oldest point on a timeline that needs to be retained because of PITR diff --git a/libs/utils/src/circuit_breaker.rs b/libs/utils/src/circuit_breaker.rs index 720ea39d4f77..e1ddfd865009 100644 --- a/libs/utils/src/circuit_breaker.rs +++ b/libs/utils/src/circuit_breaker.rs @@ -5,8 +5,10 @@ use std::{ use metrics::IntCounter; -/// Circuit breakers are for operations that are expensive and fallible: if they fail repeatedly, -/// we will stop attempting them for some period of time, to avoid denial-of-service from retries, and +/// Circuit breakers are for operations that are expensive and fallible. +/// +/// If a circuit breaker fails repeatedly, we will stop attempting it for some +/// period of time, to avoid denial-of-service from retries, and /// to mitigate the log spam from repeated failures. pub struct CircuitBreaker { /// An identifier that enables us to log useful errors when a circuit is broken diff --git a/libs/utils/src/id.rs b/libs/utils/src/id.rs index db468e30548b..2cda899b159b 100644 --- a/libs/utils/src/id.rs +++ b/libs/utils/src/id.rs @@ -249,8 +249,10 @@ macro_rules! id_newtype { }; } -/// Neon timeline IDs are different from PostgreSQL timeline -/// IDs. They serve a similar purpose though: they differentiate +/// Neon timeline ID. +/// +/// They are different from PostgreSQL timeline +/// IDs, but serve a similar purpose: they differentiate /// between different "histories" of the same cluster. However, /// PostgreSQL timeline IDs are a bit cumbersome, because they are only /// 32-bits wide, and they must be in ascending order in any given diff --git a/libs/utils/src/lock_file.rs b/libs/utils/src/lock_file.rs index 59c66ca7579e..3a2ed3e830c6 100644 --- a/libs/utils/src/lock_file.rs +++ b/libs/utils/src/lock_file.rs @@ -100,7 +100,9 @@ pub enum LockFileRead { } /// Open & try to lock the lock file at the given `path`, returning a [handle][`LockFileRead`] to -/// inspect its content. It is not an `Err(...)` if the file does not exist or is already locked. +/// inspect its content. +/// +/// It is not an `Err(...)` if the file does not exist or is already locked. /// Check the [`LockFileRead`] variants for details. pub fn read_and_hold_lock_file(path: &Utf8Path) -> anyhow::Result { let res = fs::OpenOptions::new().read(true).open(path); diff --git a/libs/utils/src/pageserver_feedback.rs b/libs/utils/src/pageserver_feedback.rs index 3ddfa44f41e5..dede65e6997a 100644 --- a/libs/utils/src/pageserver_feedback.rs +++ b/libs/utils/src/pageserver_feedback.rs @@ -8,6 +8,7 @@ use tracing::{trace, warn}; use crate::lsn::Lsn; /// Feedback pageserver sends to safekeeper and safekeeper resends to compute. +/// /// Serialized in custom flexible key/value format. In replication protocol, it /// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres /// Standby status update / Hot standby feedback messages. diff --git a/libs/utils/src/poison.rs b/libs/utils/src/poison.rs index 27378c69fc1c..c3e2fba20c3c 100644 --- a/libs/utils/src/poison.rs +++ b/libs/utils/src/poison.rs @@ -65,6 +65,8 @@ impl Poison { } } +/// Armed pointer to a [`Poison`]. +/// /// Use [`Self::data`] and [`Self::data_mut`] to access the wrapped state. /// Once modifications are done, use [`Self::disarm`]. /// If [`Guard`] gets dropped instead of calling [`Self::disarm`], the state is poisoned diff --git a/libs/utils/src/shard.rs b/libs/utils/src/shard.rs index f6b430657e10..d146010b417a 100644 --- a/libs/utils/src/shard.rs +++ b/libs/utils/src/shard.rs @@ -13,10 +13,11 @@ pub struct ShardNumber(pub u8); #[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] pub struct ShardCount(pub u8); -/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant, -/// when we need to know which shard we're dealing with, but do not need to know the full -/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know -/// the fully qualified TenantShardId. +/// Combination of ShardNumber and ShardCount. +/// +/// For use within the context of a particular tenant, when we need to know which shard we're +/// dealing with, but do not need to know the full ShardIdentity (because we won't be doing +/// any page->shard mapping), and do not need to know the fully qualified TenantShardId. #[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct ShardIndex { pub shard_number: ShardNumber, diff --git a/libs/utils/src/simple_rcu.rs b/libs/utils/src/simple_rcu.rs index ecc5353be323..01750b2aef12 100644 --- a/libs/utils/src/simple_rcu.rs +++ b/libs/utils/src/simple_rcu.rs @@ -49,12 +49,11 @@ use std::sync::{RwLock, RwLockWriteGuard}; use tokio::sync::watch; -/// /// Rcu allows multiple readers to read and hold onto a value without blocking -/// (for very long). Storing to the Rcu updates the value, making new readers -/// immediately see the new value, but it also waits for all current readers to -/// finish. +/// (for very long). /// +/// Storing to the Rcu updates the value, making new readers immediately see +/// the new value, but it also waits for all current readers to finish. pub struct Rcu { inner: RwLock>, } diff --git a/libs/utils/src/sync/heavier_once_cell.rs b/libs/utils/src/sync/heavier_once_cell.rs index 1abd3d986194..dc711fb0284e 100644 --- a/libs/utils/src/sync/heavier_once_cell.rs +++ b/libs/utils/src/sync/heavier_once_cell.rs @@ -5,7 +5,9 @@ use std::sync::{ use tokio::sync::Semaphore; /// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of -/// `SemaphorePermit`, allowing use of `take` which does not require holding an outer mutex guard +/// `SemaphorePermit`. +/// +/// Allows use of `take` which does not require holding an outer mutex guard /// for the duration of initialization. /// /// Has no unsafe, builds upon [`tokio::sync::Semaphore`] and [`std::sync::Mutex`]. diff --git a/libs/utils/src/vec_map.rs b/libs/utils/src/vec_map.rs index 18b2af14f130..5f0028bacd49 100644 --- a/libs/utils/src/vec_map.rs +++ b/libs/utils/src/vec_map.rs @@ -7,6 +7,7 @@ pub enum VecMapOrdering { } /// Ordered map datastructure implemented in a Vec. +/// /// Append only - can only add keys that are larger than the /// current max key. /// Ordering can be adjusted using [`VecMapOrdering`] diff --git a/libs/utils/src/yielding_loop.rs b/libs/utils/src/yielding_loop.rs index 41c4cee45d2d..68274f063122 100644 --- a/libs/utils/src/yielding_loop.rs +++ b/libs/utils/src/yielding_loop.rs @@ -6,9 +6,10 @@ pub enum YieldingLoopError { Cancelled, } -/// Helper for long synchronous loops, e.g. over all tenants in the system. Periodically -/// yields to avoid blocking the executor, and after resuming checks the provided -/// cancellation token to drop out promptly on shutdown. +/// Helper for long synchronous loops, e.g. over all tenants in the system. +/// +/// Periodically yields to avoid blocking the executor, and after resuming +/// checks the provided cancellation token to drop out promptly on shutdown. #[inline(always)] pub async fn yielding_loop( interval: usize, diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 4e68e276d35c..29a98855d3fe 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -180,6 +180,8 @@ pub struct PageServerConf { pub io_buffer_alignment: usize, } +/// Token for authentication to safekeepers +/// /// We do not want to store this in a PageServerConf because the latter may be logged /// and/or serialized at a whim, while the token is secret. Currently this token is the /// same for accessing all tenants/timelines, but may become per-tenant/per-timeline in diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index 012cb8d96f2f..7afcf52cf29e 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -1,7 +1,9 @@ -//! This module defines `RequestContext`, a structure that we use throughout -//! the pageserver to propagate high-level context from places -//! that _originate_ activity down to the shared code paths at the -//! heart of the pageserver. It's inspired by Golang's `context.Context`. +//! Defines [`RequestContext`]. +//! +//! It is a structure that we use throughout the pageserver to propagate +//! high-level context from places that _originate_ activity down to the +//! shared code paths at the heart of the pageserver. It's inspired by +//! Golang's `context.Context`. //! //! For example, in `Timeline::get(page_nr, lsn)` we need to answer the following questions: //! 1. What high-level activity ([`TaskKind`]) needs this page? diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index d28a2142656a..808d4b666e07 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1021,9 +1021,10 @@ impl Timeline { } /// DatadirModification represents an operation to ingest an atomic set of -/// updates to the repository. It is created by the 'begin_record' -/// function. It is called for each WAL record, so that all the modifications -/// by a one WAL record appear atomic. +/// updates to the repository. +/// +/// It is created by the 'begin_record' function. It is called for each WAL +/// record, so that all the modifications by a one WAL record appear atomic. pub struct DatadirModification<'a> { /// The timeline this modification applies to. You can access this to /// read the state, but note that any pending updates are *not* reflected @@ -2048,6 +2049,7 @@ impl<'a> DatadirModification<'a> { /// This struct facilitates accessing either a committed key from the timeline at a /// specific LSN, or the latest uncommitted key from a pending modification. +/// /// During WAL ingestion, the records from multiple LSNs may be batched in the same /// modification before being flushed to the timeline. Hence, the routines in WalIngest /// need to look up the keys in the modification first before looking them up in the diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index fb30857ddf6b..fd2520a42eb3 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1,8 +1,9 @@ +//! Timeline repository implementation that keeps old data in layer files, and +//! the recent changes in ephemeral files. //! -//! Timeline repository implementation that keeps old data in files on disk, and -//! the recent changes in memory. See tenant/*_layer.rs files. -//! The functions here are responsible for locating the correct layer for the -//! get/put call, walking back the timeline branching history as needed. +//! See tenant/*_layer.rs files. The functions here are responsible for locating +//! the correct layer for the get/put call, walking back the timeline branching +//! history as needed. //! //! The files are stored in the .neon/tenants//timelines/ //! directory. See docs/pageserver-storage.md for how the files are managed. diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs index 190316df426d..24440d4b3559 100644 --- a/pageserver/src/tenant/metadata.rs +++ b/pageserver/src/tenant/metadata.rs @@ -1,7 +1,8 @@ -//! Describes the legacy now hopefully no longer modified per-timeline metadata stored in -//! `index_part.json` managed by [`remote_timeline_client`]. For many tenants and their timelines, -//! this struct and it's original serialization format is still needed because they were written a -//! long time ago. +//! Describes the legacy now hopefully no longer modified per-timeline metadata. +//! +//! It is stored in `index_part.json` managed by [`remote_timeline_client`]. For many tenants and +//! their timelines, this struct and its original serialization format is still needed because +//! they were written a long time ago. //! //! Instead of changing and adding versioning to this, just change [`IndexPart`] with soft json //! versioning. diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 4e6ea0c8f974..2104f415319e 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -282,9 +282,10 @@ impl BackgroundPurges { static TENANTS: Lazy> = Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing)); -/// The TenantManager is responsible for storing and mutating the collection of all tenants -/// that this pageserver process has state for. Every Tenant and SecondaryTenant instance -/// lives inside the TenantManager. +/// Responsible for storing and mutating the collection of all tenants +/// that this pageserver has state for. +/// +/// Every Tenant and SecondaryTenant instance lives inside the TenantManager. /// /// The most important role of the TenantManager is to prevent conflicts: e.g. trying to attach /// the same tenant twice concurrently, or trying to configure the same tenant into secondary @@ -2346,8 +2347,9 @@ pub enum TenantMapError { ShuttingDown, } -/// Guards a particular tenant_id's content in the TenantsMap. While this -/// structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`] +/// Guards a particular tenant_id's content in the TenantsMap. +/// +/// While this structure exists, the TenantsMap will contain a [`TenantSlot::InProgress`] /// for this tenant, which acts as a marker for any operations targeting /// this tenant to retry later, or wait for the InProgress state to end. /// diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 71b766e4c76f..1f9ae40af534 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -2184,6 +2184,8 @@ pub fn remote_timeline_path( remote_timelines_path(tenant_shard_id).join(Utf8Path::new(&timeline_id.to_string())) } +/// Obtains the path of the given Layer in the remote +/// /// Note that the shard component of a remote layer path is _not_ always the same /// as in the TenantShardId of the caller: tenants may reference layers from a different /// ShardIndex. Use the ShardIndex from the layer's metadata. diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index 757fb9d03242..c51ff549195a 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -1,4 +1,5 @@ //! In-memory index to track the tenant files on the remote storage. +//! //! Able to restore itself from the storage index parts, that are located in every timeline's remote directory and contain all data about //! remote timeline layers and its metadata. diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index a1202ad507d0..dac6b2f89386 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -434,10 +434,11 @@ impl ReadableLayer { } } -/// Layers contain a hint indicating whether they are likely to be used for reads. This is a hint rather -/// than an authoritative value, so that we do not have to update it synchronously when changing the visibility -/// of layers (for example when creating a branch that makes some previously covered layers visible). It should -/// be used for cache management but not for correctness-critical checks. +/// Layers contain a hint indicating whether they are likely to be used for reads. +/// +/// This is a hint rather than an authoritative value, so that we do not have to update it synchronously +/// when changing the visibility of layers (for example when creating a branch that makes some previously +/// covered layers visible). It should be used for cache management but not for correctness-critical checks. #[derive(Debug, Clone, PartialEq, Eq)] pub enum LayerVisibilityHint { /// A Visible layer might be read while serving a read, because there is not an image layer between it diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 6a2cd942320e..34f1b15138ec 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -136,10 +136,11 @@ impl Summary { // Flag indicating that this version initialize the page const WILL_INIT: u64 = 1; -/// Struct representing reference to BLOB in layers. Reference contains BLOB -/// offset, and for WAL records it also contains `will_init` flag. The flag -/// helps to determine the range of records that needs to be applied, without -/// reading/deserializing records themselves. +/// Struct representing reference to BLOB in layers. +/// +/// Reference contains BLOB offset, and for WAL records it also contains +/// `will_init` flag. The flag helps to determine the range of records +/// that needs to be applied, without reading/deserializing records themselves. #[derive(Debug, Serialize, Deserialize, Copy, Clone)] pub struct BlobRef(pub u64); diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 77ce1ae670fc..875e223c9c8f 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -1,7 +1,9 @@ //! An ImageLayer represents an image or a snapshot of a key-range at -//! one particular LSN. It contains an image of all key-value pairs -//! in its key-range. Any key that falls into the image layer's range -//! but does not exist in the layer, does not exist. +//! one particular LSN. +//! +//! It contains an image of all key-value pairs in its key-range. Any key +//! that falls into the image layer's range but does not exist in the layer, +//! does not exist. //! //! An image layer is stored in a file on disk. The file is stored in //! timelines/ directory. Currently, there are no diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs index cbd18e650f62..e90ff3c4b287 100644 --- a/pageserver/src/tenant/storage_layer/layer_desc.rs +++ b/pageserver/src/tenant/storage_layer/layer_desc.rs @@ -12,8 +12,10 @@ use serde::{Deserialize, Serialize}; #[cfg(test)] use utils::id::TenantId; -/// A unique identifier of a persistent layer. This is different from `LayerDescriptor`, which is only used in the -/// benchmarks. This struct contains all necessary information to find the image / delta layer. It also provides +/// A unique identifier of a persistent layer. +/// +/// This is different from `LayerDescriptor`, which is only used in the benchmarks. +/// This struct contains all necessary information to find the image / delta layer. It also provides /// a unified way to generate layer information like file name. #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, Hash)] pub struct PersistentLayerDesc { diff --git a/pageserver/src/tenant/storage_layer/layer_name.rs b/pageserver/src/tenant/storage_layer/layer_name.rs index 47ae556279da..ffe7ca5f3e40 100644 --- a/pageserver/src/tenant/storage_layer/layer_name.rs +++ b/pageserver/src/tenant/storage_layer/layer_name.rs @@ -217,8 +217,9 @@ impl fmt::Display for ImageLayerName { } } -/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. The -/// LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations +/// LayerName is the logical identity of a layer within a LayerMap at a moment in time. +/// +/// The LayerName is not a unique filename, as the same LayerName may have multiple physical incarnations /// over time (e.g. across shard splits or compression). The physical filenames of layers in local /// storage and object names in remote storage consist of the LayerName plus some extra qualifiers /// that uniquely identify the physical incarnation of a layer (see [crate::tenant::remote_timeline_client::remote_layer_path]) diff --git a/pageserver/src/tenant/storage_layer/merge_iterator.rs b/pageserver/src/tenant/storage_layer/merge_iterator.rs index d2c341e5ced9..0831fd9530da 100644 --- a/pageserver/src/tenant/storage_layer/merge_iterator.rs +++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs @@ -226,9 +226,11 @@ impl<'a> IteratorWrapper<'a> { } } -/// A merge iterator over delta/image layer iterators. When duplicated records are -/// found, the iterator will not perform any deduplication, and the caller should handle -/// these situation. By saying duplicated records, there are many possibilities: +/// A merge iterator over delta/image layer iterators. +/// +/// When duplicated records are found, the iterator will not perform any +/// deduplication, and the caller should handle these situation. By saying +/// duplicated records, there are many possibilities: /// /// * Two same delta at the same LSN. /// * Two same image at the same LSN. diff --git a/pageserver/src/tenant/storage_layer/split_writer.rs b/pageserver/src/tenant/storage_layer/split_writer.rs index e8deb0a1e587..7c1ac863bf2e 100644 --- a/pageserver/src/tenant/storage_layer/split_writer.rs +++ b/pageserver/src/tenant/storage_layer/split_writer.rs @@ -34,9 +34,10 @@ impl SplitWriterResult { } } -/// An image writer that takes images and produces multiple image layers. The interface does not -/// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files -/// to be cleaned up) +/// An image writer that takes images and produces multiple image layers. +/// +/// The interface does not guarantee atomicity (i.e., if the image layer generation +/// fails, there might be leftover files to be cleaned up) #[must_use] pub struct SplitImageLayerWriter { inner: ImageLayerWriter, @@ -193,9 +194,10 @@ impl SplitImageLayerWriter { } } -/// A delta writer that takes key-lsn-values and produces multiple delta layers. The interface does not -/// guarantee atomicity (i.e., if the delta layer generation fails, there might be leftover files -/// to be cleaned up). +/// A delta writer that takes key-lsn-values and produces multiple delta layers. +/// +/// The interface does not guarantee atomicity (i.e., if the delta layer generation fails, +/// there might be leftover files to be cleaned up). /// /// Note that if updates of a single key exceed the target size limit, all of the updates will be batched /// into a single file. This behavior might change in the future. For reference, the legacy compaction algorithm diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs index 4d51dc442d98..553edf6d8b34 100644 --- a/pageserver/src/tenant/vectored_blob_io.rs +++ b/pageserver/src/tenant/vectored_blob_io.rs @@ -593,8 +593,10 @@ impl<'a> VectoredBlobReader<'a> { } } -/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`]. It provides a streaming API for -/// getting read blobs. It returns a batch when `handle` gets called and when the current key would just exceed the read_size and +/// Read planner used in [`crate::tenant::storage_layer::image_layer::ImageLayerIterator`]. +/// +/// It provides a streaming API for getting read blobs. It returns a batch when +/// `handle` gets called and when the current key would just exceed the read_size and /// max_cnt constraints. pub struct StreamingVectoredReadPlanner { read_builder: Option, diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index ed6ff86c1099..57856eea8079 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -1,6 +1,7 @@ -//! //! VirtualFile is like a normal File, but it's not bound directly to -//! a file descriptor. Instead, the file is opened when it's read from, +//! a file descriptor. +//! +//! Instead, the file is opened when it's read from, //! and if too many files are open globally in the system, least-recently //! used ones are closed. //! diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 82585f9ed8ab..a36955fa2140 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -43,13 +43,12 @@ use utils::lsn::Lsn; use utils::sync::gate::GateError; use utils::sync::heavier_once_cell; +/// The real implementation that uses a Postgres process to +/// perform WAL replay. /// -/// This is the real implementation that uses a Postgres process to -/// perform WAL replay. Only one thread can use the process at a time, -/// that is controlled by the Mutex. In the future, we might want to -/// launch a pool of processes to allow concurrent replay of multiple -/// records. -/// +/// Only one thread can use the process at a time, that is controlled by the +/// Mutex. In the future, we might want to launch a pool of processes to allow +/// concurrent replay of multiple records. pub struct PostgresRedoManager { tenant_shard_id: TenantShardId, conf: &'static PageServerConf, diff --git a/proxy/src/stream.rs b/proxy/src/stream.rs index 332dc2778768..c14dd18afe0e 100644 --- a/proxy/src/stream.rs +++ b/proxy/src/stream.rs @@ -14,6 +14,7 @@ use tokio::io::{AsyncRead, AsyncWrite, ReadBuf}; use tokio_rustls::server::TlsStream; /// Stream wrapper which implements libpq's protocol. +/// /// NOTE: This object deliberately doesn't implement [`AsyncRead`] /// or [`AsyncWrite`] to prevent subtle errors (e.g. trying /// to pass random malformed bytes through the connection). diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 600a6bd8f0f5..64585f5edcc0 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -484,6 +484,7 @@ pub async fn validate_temp_timeline( } /// Move timeline from a temp directory to the main storage, and load it to the global map. +/// /// This operation is done under a lock to prevent bugs if several concurrent requests are /// trying to load the same timeline. Note that it doesn't guard against creating the /// timeline with the same ttid, but no one should be doing this anyway. diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index ab8c76dc17e2..e35f806e9011 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -448,8 +448,10 @@ async fn network_write( const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1); /// Encapsulates a task which takes messages from msg_rx, processes and pushes -/// replies to reply_tx; reading from socket and writing to disk in parallel is -/// beneficial for performance, this struct provides writing to disk part. +/// replies to reply_tx. +/// +/// Reading from socket and writing to disk in parallel is beneficial for +/// performance, this struct provides the writing to disk part. pub struct WalAcceptor { tli: WalResidentTimeline, msg_rx: Receiver, diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs index dca64140827f..97eeae363899 100644 --- a/safekeeper/src/state.rs +++ b/safekeeper/src/state.rs @@ -147,9 +147,11 @@ pub struct TimelineMemState { pub proposer_uuid: PgUuid, } -/// Safekeeper persistent state plus in memory layer, to avoid frequent fsyncs -/// when we update fields like commit_lsn which don't need immediate -/// persistence. Provides transactional like API to atomically update the state. +/// Safekeeper persistent state plus in memory layer. +/// +/// Allows us to avoid frequent fsyncs when we update fields like commit_lsn +/// which don't need immediate persistence. Provides transactional like API +/// to atomically update the state. /// /// Implements Deref into *persistent* part. pub struct TimelineState { diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 95ee925e1a04..6fd5de0ad680 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -169,6 +169,7 @@ impl<'a> Drop for WriteGuardSharedState<'a> { } /// This structure is stored in shared state and represents the state of the timeline. +/// /// Usually it holds SafeKeeper, but it also supports offloaded timeline state. In this /// case, SafeKeeper is not available (because WAL is not present on disk) and all /// operations can be done only with control file. diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index 5d0567575ccd..5aa4921a9281 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -1,6 +1,8 @@ -//! Code related to evicting WAL files to remote storage. The actual upload is done by the -//! partial WAL backup code. This file has code to delete and re-download WAL files, -//! cross-validate with partial WAL backup if local file is still present. +//! Code related to evicting WAL files to remote storage. +//! +//! The actual upload is done by the partial WAL backup code. This file has +//! code to delete and re-download WAL files, cross-validate with partial WAL +//! backup if local file is still present. use anyhow::Context; use camino::Utf8PathBuf; diff --git a/safekeeper/src/timeline_guard.rs b/safekeeper/src/timeline_guard.rs index dbdf46412ddb..1ddac573d287 100644 --- a/safekeeper/src/timeline_guard.rs +++ b/safekeeper/src/timeline_guard.rs @@ -1,4 +1,6 @@ -//! Timeline residence guard is needed to ensure that WAL segments are present on disk, +//! Timeline residence guard +//! +//! It is needed to ensure that WAL segments are present on disk, //! as long as the code is holding the guard. This file implements guard logic, to issue //! and drop guards, and to notify the manager when the guard is dropped. diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index f997f4845428..6be75479db7f 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -1,4 +1,5 @@ //! The timeline manager task is responsible for managing the timeline's background tasks. +//! //! It is spawned alongside each timeline and exits when the timeline is deleted. //! It watches for changes in the timeline state and decides when to spawn or kill background tasks. //! It also can manage some reactive state, like should the timeline be active for broker pushes or not. diff --git a/safekeeper/src/timelines_set.rs b/safekeeper/src/timelines_set.rs index d6eea79f8227..096e3482954d 100644 --- a/safekeeper/src/timelines_set.rs +++ b/safekeeper/src/timelines_set.rs @@ -60,7 +60,8 @@ impl TimelinesSet { } } -/// Guard is used to add or remove timeline from the set. +/// Guard is used to add or remove timelines from the set. +/// /// If the timeline present in set, it will be removed from it on drop. /// Note: do not use more than one guard for the same timeline, it caches the presence state. /// It is designed to be used in the manager task only. diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index 4050a82fffe4..bddfca50e4fb 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -1,6 +1,8 @@ //! Safekeeper timeline has a background task which is subscribed to `commit_lsn` -//! and `flush_lsn` updates. After the partial segment was updated (`flush_lsn` -//! was changed), the segment will be uploaded to S3 in about 15 minutes. +//! and `flush_lsn` updates. +//! +//! After the partial segment was updated (`flush_lsn` was changed), the segment +//! will be uploaded to S3 within the configured `partial_backup_timeout`. //! //! The filename format for partial segments is //! `Segment_Term_Flush_Commit_skNN.partial`, where: diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs index 16f7748eb492..1ab54d4cce9f 100644 --- a/safekeeper/src/wal_service.rs +++ b/safekeeper/src/wal_service.rs @@ -17,6 +17,7 @@ use crate::SafeKeeperConf; use postgres_backend::{AuthType, PostgresBackend}; /// Accept incoming TCP connections and spawn them into a background thread. +/// /// allowed_auth_scope is either SafekeeperData (wide JWT tokens giving access /// to any tenant are allowed) or Tenant (only tokens giving access to specific /// tenant are allowed). Doesn't matter if auth is disabled in conf. diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 324f864291a7..e7eae647df5b 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -117,7 +117,9 @@ pub(crate) const STARTUP_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30); pub const MAX_OFFLINE_INTERVAL_DEFAULT: Duration = Duration::from_secs(30); /// How long a node may be unresponsive to heartbeats during start up before we declare it -/// offline. This is much more lenient than [`MAX_OFFLINE_INTERVAL_DEFAULT`] since the pageserver's +/// offline. +/// +/// This is much more lenient than [`MAX_OFFLINE_INTERVAL_DEFAULT`] since the pageserver's /// handling of the re-attach response may take a long time and blocks heartbeats from /// being handled on the pageserver side. pub const MAX_WARMING_UP_INTERVAL_DEFAULT: Duration = Duration::from_secs(300); diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 3e22960f8deb..d53611ed6e91 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -1,6 +1,7 @@ -//! Functionality for finding and purging garbage, as in "garbage collection". Garbage means -//! S3 objects which are either not referenced by any metadata, or are referenced by a -//! control plane tenant/timeline in a deleted state. +//! Functionality for finding and purging garbage, as in "garbage collection". +//! +//! Garbage means S3 objects which are either not referenced by any metadata, +//! or are referenced by a control plane tenant/timeline in a deleted state. use std::{ collections::{HashMap, HashSet}, diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index 10d77937f12a..f896cff2d541 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -74,7 +74,9 @@ pub async fn stream_tenant_shards<'a>( } /// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered -/// using a listing. The listing is done before the stream is built, so that this +/// using a listing. +/// +/// The listing is done before the stream is built, so that this /// function can be used to generate concurrency on a stream using buffer_unordered. pub async fn stream_tenant_timelines<'a>( remote_client: &'a GenericRemoteStorage, diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index 88681e38c268..c96d9cad3bc0 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -440,9 +440,10 @@ async fn gc_ancestor( Ok(()) } -/// Physical garbage collection: removing unused S3 objects. This is distinct from the garbage collection -/// done inside the pageserver, which operates at a higher level (keys, layers). This type of garbage collection -/// is about removing: +/// Physical garbage collection: removing unused S3 objects. +/// +/// This is distinct from the garbage collection done inside the pageserver, which operates at a higher level +/// (keys, layers). This type of garbage collection is about removing: /// - Objects that were uploaded but never referenced in the remote index (e.g. because of a shutdown between /// uploading a layer and uploading an index) /// - Index objects from historic generations