diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index 8c5a4e616869..e83cf4c855a1 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -1,59 +1,42 @@ -use std::{ops::RangeInclusive, str::FromStr}; +//! See docs/rfcs/031-sharding-static.md for an overview of sharding. +//! +//! This module contains a variety of types used to represent the concept of sharding +//! a Neon tenant across multiple physical shards. Since there are quite a few of these, +//! we provide an summary here. +//! +//! Types used to describe shards: +//! - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value +//! which identifies a tenant which is not shard-aware. This means its storage paths do not include +//! a shard suffix. +//! - [`ShardNumber`] is simply the zero-based index of a shard within a tenant. +//! - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId` +//! without the tenant ID. This is useful for things that are implicitly scoped to a particular +//! tenant, such as layer files. +//! - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient +//! detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read. +//! - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as +//! four hex digits. An unsharded tenant is `0000`. +//! - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant +//! +//! Types used to describe the parameters for data distribution in a sharded tenant: +//! - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across +//! multiple shards. Its value is given in 8kiB pages. +//! - [`ShardLayout`] describes the data distribution scheme, and at time of writing is +//! always zero: this is provided for future upgrades that might introduce different +//! data distribution schemes. +//! +//! Examples: +//! - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000 +//! - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001 +//! - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive), +//! and their slugs are 0004, 0104, 0204, and 0304. use crate::{key::Key, models::ShardParameters}; -use hex::FromHex; use postgres_ffi::relfile_utils::INIT_FORKNUM; use serde::{Deserialize, Serialize}; -use utils::id::TenantId; -/// See docs/rfcs/031-sharding-static.md for an overview of sharding. -/// -/// This module contains a variety of types used to represent the concept of sharding -/// a Neon tenant across multiple physical shards. Since there are quite a few of these, -/// we provide an summary here. -/// -/// Types used to describe shards: -/// - [`ShardCount`] describes how many shards make up a tenant, plus the magic `unsharded` value -/// which identifies a tenant which is not shard-aware. This means its storage paths do not include -/// a shard suffix. -/// - [`ShardNumber`] is simply the zero-based index of a shard within a tenant. -/// - [`ShardIndex`] is the 2-tuple of `ShardCount` and `ShardNumber`, it's just like a `TenantShardId` -/// without the tenant ID. This is useful for things that are implicitly scoped to a particular -/// tenant, such as layer files. -/// - [`ShardIdentity`]` is the full description of a particular shard's parameters, in sufficient -/// detail to convert a [`Key`] to a [`ShardNumber`] when deciding where to write/read. -/// - The [`ShardSlug`] is a terse formatter for ShardCount and ShardNumber, written as -/// four hex digits. An unsharded tenant is `0000`. -/// - [`TenantShardId`] is the unique ID of a particular shard within a particular tenant -/// -/// Types used to describe the parameters for data distribution in a sharded tenant: -/// - [`ShardStripeSize`] controls how long contiguous runs of [`Key`]s (stripes) are when distributed across -/// multiple shards. Its value is given in 8kiB pages. -/// - [`ShardLayout`] describes the data distribution scheme, and at time of writing is -/// always zero: this is provided for future upgrades that might introduce different -/// data distribution schemes. -/// -/// Examples: -/// - A legacy unsharded tenant has one shard with ShardCount(0), ShardNumber(0), and its slug is 0000 -/// - A single sharded tenant has one shard with ShardCount(1), ShardNumber(0), and its slug is 0001 -/// - In a tenant with 4 shards, each shard has ShardCount(N), ShardNumber(i) where i in 0..N-1 (inclusive), -/// and their slugs are 0004, 0104, 0204, and 0304. - -#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] -pub struct ShardNumber(pub u8); - -#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] -pub struct ShardCount(u8); - -/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant, -/// when we need to know which shard we're dealing with, but do not need to know the full -/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know -/// the fully qualified TenantShardId. -#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] -pub struct ShardIndex { - pub shard_number: ShardNumber, - pub shard_count: ShardCount, -} +#[doc(inline)] +pub use ::utils::shard::*; /// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`], /// and to check whether that [`ShardNumber`] is the same as the current shard. @@ -65,362 +48,6 @@ pub struct ShardIdentity { layout: ShardLayout, } -/// Formatting helper, for generating the `shard_id` label in traces. -struct ShardSlug<'a>(&'a TenantShardId); - -/// TenantShardId globally identifies a particular shard in a particular tenant. -/// -/// These are written as `-`, for example: -/// # The second shard in a two-shard tenant -/// 072f1291a5310026820b2fe4b2968934-0102 -/// -/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without -/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables -/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`]. -/// -/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs, -/// is both forward and backward compatible with TenantId: a legacy TenantId can be -/// decoded as a TenantShardId, and when re-encoded it will be parseable -/// as a TenantId. -#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] -pub struct TenantShardId { - pub tenant_id: TenantId, - pub shard_number: ShardNumber, - pub shard_count: ShardCount, -} - -impl ShardCount { - pub const MAX: Self = Self(u8::MAX); - - /// The internal value of a ShardCount may be zero, which means "1 shard, but use - /// legacy format for TenantShardId that excludes the shard suffix", also known - /// as [`TenantShardId::unsharded`]. - /// - /// This method returns the actual number of shards, i.e. if our internal value is - /// zero, we return 1 (unsharded tenants have 1 shard). - pub fn count(&self) -> u8 { - if self.0 > 0 { - self.0 - } else { - 1 - } - } - - /// The literal internal value: this is **not** the number of shards in the - /// tenant, as we have a special zero value for legacy unsharded tenants. Use - /// [`Self::count`] if you want to know the cardinality of shards. - pub fn literal(&self) -> u8 { - self.0 - } - - /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but - /// uses the legacy format for `TenantShardId`. See also the documentation for - /// [`Self::count`]. - pub fn is_unsharded(&self) -> bool { - self.0 == 0 - } - - /// `v` may be zero, or the number of shards in the tenant. `v` is what - /// [`Self::literal`] would return. - pub const fn new(val: u8) -> Self { - Self(val) - } -} - -impl ShardNumber { - pub const MAX: Self = Self(u8::MAX); -} - -impl TenantShardId { - pub fn unsharded(tenant_id: TenantId) -> Self { - Self { - tenant_id, - shard_number: ShardNumber(0), - shard_count: ShardCount(0), - } - } - - /// The range of all TenantShardId that belong to a particular TenantId. This is useful when - /// you have a BTreeMap of TenantShardId, and are querying by TenantId. - pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive { - RangeInclusive::new( - Self { - tenant_id, - shard_number: ShardNumber(0), - shard_count: ShardCount(0), - }, - Self { - tenant_id, - shard_number: ShardNumber::MAX, - shard_count: ShardCount::MAX, - }, - ) - } - - pub fn shard_slug(&self) -> impl std::fmt::Display + '_ { - ShardSlug(self) - } - - /// Convenience for code that has special behavior on the 0th shard. - pub fn is_shard_zero(&self) -> bool { - self.shard_number == ShardNumber(0) - } - - /// The "unsharded" value is distinct from simply having a single shard: it represents - /// a tenant which is not shard-aware at all, and whose storage paths will not include - /// a shard suffix. - pub fn is_unsharded(&self) -> bool { - self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded() - } - - /// Convenience for dropping the tenant_id and just getting the ShardIndex: this - /// is useful when logging from code that is already in a span that includes tenant ID, to - /// keep messages reasonably terse. - pub fn to_index(&self) -> ShardIndex { - ShardIndex { - shard_number: self.shard_number, - shard_count: self.shard_count, - } - } - - /// Calculate the children of this TenantShardId when splitting the overall tenant into - /// the given number of shards. - pub fn split(&self, new_shard_count: ShardCount) -> Vec { - let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1); - let mut child_shards = Vec::new(); - for shard_number in 0..ShardNumber(new_shard_count.0).0 { - // Key mapping is based on a round robin mapping of key hash modulo shard count, - // so our child shards are the ones which the same keys would map to. - if shard_number % effective_old_shard_count == self.shard_number.0 { - child_shards.push(TenantShardId { - tenant_id: self.tenant_id, - shard_number: ShardNumber(shard_number), - shard_count: new_shard_count, - }) - } - } - - child_shards - } -} - -impl<'a> std::fmt::Display for ShardSlug<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{:02x}{:02x}", - self.0.shard_number.0, self.0.shard_count.0 - ) - } -} - -impl std::fmt::Display for TenantShardId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.shard_count != ShardCount(0) { - write!(f, "{}-{}", self.tenant_id, self.shard_slug()) - } else { - // Legacy case (shard_count == 0) -- format as just the tenant id. Note that this - // is distinct from the normal single shard case (shard count == 1). - self.tenant_id.fmt(f) - } - } -} - -impl std::fmt::Debug for TenantShardId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // Debug is the same as Display: the compact hex representation - write!(f, "{}", self) - } -} - -impl std::str::FromStr for TenantShardId { - type Err = hex::FromHexError; - - fn from_str(s: &str) -> Result { - // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count - if s.len() == 32 { - // Legacy case: no shard specified - Ok(Self { - tenant_id: TenantId::from_str(s)?, - shard_number: ShardNumber(0), - shard_count: ShardCount(0), - }) - } else if s.len() == 37 { - let bytes = s.as_bytes(); - let tenant_id = TenantId::from_hex(&bytes[0..32])?; - let mut shard_parts: [u8; 2] = [0u8; 2]; - hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?; - Ok(Self { - tenant_id, - shard_number: ShardNumber(shard_parts[0]), - shard_count: ShardCount(shard_parts[1]), - }) - } else { - Err(hex::FromHexError::InvalidStringLength) - } - } -} - -impl From<[u8; 18]> for TenantShardId { - fn from(b: [u8; 18]) -> Self { - let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap(); - - Self { - tenant_id: TenantId::from(tenant_id_bytes), - shard_number: ShardNumber(b[16]), - shard_count: ShardCount(b[17]), - } - } -} - -impl ShardIndex { - pub fn new(number: ShardNumber, count: ShardCount) -> Self { - Self { - shard_number: number, - shard_count: count, - } - } - pub fn unsharded() -> Self { - Self { - shard_number: ShardNumber(0), - shard_count: ShardCount(0), - } - } - - /// The "unsharded" value is distinct from simply having a single shard: it represents - /// a tenant which is not shard-aware at all, and whose storage paths will not include - /// a shard suffix. - pub fn is_unsharded(&self) -> bool { - self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0) - } - - /// For use in constructing remote storage paths: concatenate this with a TenantId - /// to get a fully qualified TenantShardId. - /// - /// Backward compat: this function returns an empty string if Self::is_unsharded, such - /// that the legacy pre-sharding remote key format is preserved. - pub fn get_suffix(&self) -> String { - if self.is_unsharded() { - "".to_string() - } else { - format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0) - } - } -} - -impl std::fmt::Display for ShardIndex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0) - } -} - -impl std::fmt::Debug for ShardIndex { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // Debug is the same as Display: the compact hex representation - write!(f, "{}", self) - } -} - -impl std::str::FromStr for ShardIndex { - type Err = hex::FromHexError; - - fn from_str(s: &str) -> Result { - // Expect format: 1 byte shard number, 1 byte shard count - if s.len() == 4 { - let bytes = s.as_bytes(); - let mut shard_parts: [u8; 2] = [0u8; 2]; - hex::decode_to_slice(bytes, &mut shard_parts)?; - Ok(Self { - shard_number: ShardNumber(shard_parts[0]), - shard_count: ShardCount(shard_parts[1]), - }) - } else { - Err(hex::FromHexError::InvalidStringLength) - } - } -} - -impl From<[u8; 2]> for ShardIndex { - fn from(b: [u8; 2]) -> Self { - Self { - shard_number: ShardNumber(b[0]), - shard_count: ShardCount(b[1]), - } - } -} - -impl Serialize for TenantShardId { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - if serializer.is_human_readable() { - serializer.collect_str(self) - } else { - // Note: while human encoding of [`TenantShardId`] is backward and forward - // compatible, this binary encoding is not. - let mut packed: [u8; 18] = [0; 18]; - packed[0..16].clone_from_slice(&self.tenant_id.as_arr()); - packed[16] = self.shard_number.0; - packed[17] = self.shard_count.0; - - packed.serialize(serializer) - } - } -} - -impl<'de> Deserialize<'de> for TenantShardId { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - struct IdVisitor { - is_human_readable_deserializer: bool, - } - - impl<'de> serde::de::Visitor<'de> for IdVisitor { - type Value = TenantShardId; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.is_human_readable_deserializer { - formatter.write_str("value in form of hex string") - } else { - formatter.write_str("value in form of integer array([u8; 18])") - } - } - - fn visit_seq(self, seq: A) -> Result - where - A: serde::de::SeqAccess<'de>, - { - let s = serde::de::value::SeqAccessDeserializer::new(seq); - let id: [u8; 18] = Deserialize::deserialize(s)?; - Ok(TenantShardId::from(id)) - } - - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { - TenantShardId::from_str(v).map_err(E::custom) - } - } - - if deserializer.is_human_readable() { - deserializer.deserialize_str(IdVisitor { - is_human_readable_deserializer: true, - }) - } else { - deserializer.deserialize_tuple( - 18, - IdVisitor { - is_human_readable_deserializer: false, - }, - ) - } - } -} - /// Stripe size in number of pages #[derive(Clone, Copy, Serialize, Deserialize, Eq, PartialEq, Debug)] pub struct ShardStripeSize(pub u32); @@ -585,77 +212,6 @@ impl ShardIdentity { } } -impl Serialize for ShardIndex { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - if serializer.is_human_readable() { - serializer.collect_str(self) - } else { - // Binary encoding is not used in index_part.json, but is included in anticipation of - // switching various structures (e.g. inter-process communication, remote metadata) to more - // compact binary encodings in future. - let mut packed: [u8; 2] = [0; 2]; - packed[0] = self.shard_number.0; - packed[1] = self.shard_count.0; - packed.serialize(serializer) - } - } -} - -impl<'de> Deserialize<'de> for ShardIndex { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - struct IdVisitor { - is_human_readable_deserializer: bool, - } - - impl<'de> serde::de::Visitor<'de> for IdVisitor { - type Value = ShardIndex; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - if self.is_human_readable_deserializer { - formatter.write_str("value in form of hex string") - } else { - formatter.write_str("value in form of integer array([u8; 2])") - } - } - - fn visit_seq(self, seq: A) -> Result - where - A: serde::de::SeqAccess<'de>, - { - let s = serde::de::value::SeqAccessDeserializer::new(seq); - let id: [u8; 2] = Deserialize::deserialize(s)?; - Ok(ShardIndex::from(id)) - } - - fn visit_str(self, v: &str) -> Result - where - E: serde::de::Error, - { - ShardIndex::from_str(v).map_err(E::custom) - } - } - - if deserializer.is_human_readable() { - deserializer.deserialize_str(IdVisitor { - is_human_readable_deserializer: true, - }) - } else { - deserializer.deserialize_tuple( - 2, - IdVisitor { - is_human_readable_deserializer: false, - }, - ) - } - } -} - /// Whether this key is always held on shard 0 (e.g. shard 0 holds all SLRU keys /// in order to be able to serve basebackup requests without peer communication). fn key_is_shard0(key: &Key) -> bool { @@ -737,7 +293,9 @@ pub fn describe( #[cfg(test)] mod tests { - use utils::Hex; + use std::str::FromStr; + + use utils::{id::TenantId, Hex}; use super::*; diff --git a/libs/utils/src/lib.rs b/libs/utils/src/lib.rs index 2a397d97d2b9..711e617801ea 100644 --- a/libs/utils/src/lib.rs +++ b/libs/utils/src/lib.rs @@ -26,6 +26,8 @@ pub mod auth; // utility functions and helper traits for unified unique id generation/serialization etc. pub mod id; +pub mod shard; + mod hex; pub use hex::Hex; diff --git a/libs/utils/src/shard.rs b/libs/utils/src/shard.rs new file mode 100644 index 000000000000..4f9ac6bdb49a --- /dev/null +++ b/libs/utils/src/shard.rs @@ -0,0 +1,451 @@ +//! See `pageserver_api::shard` for description on sharding. + +use std::{ops::RangeInclusive, str::FromStr}; + +use hex::FromHex; +use serde::{Deserialize, Serialize}; + +use crate::id::TenantId; + +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] +pub struct ShardNumber(pub u8); + +#[derive(Ord, PartialOrd, Eq, PartialEq, Clone, Copy, Serialize, Deserialize, Debug, Hash)] +pub struct ShardCount(pub u8); + +/// Combination of ShardNumber and ShardCount. For use within the context of a particular tenant, +/// when we need to know which shard we're dealing with, but do not need to know the full +/// ShardIdentity (because we won't be doing any page->shard mapping), and do not need to know +/// the fully qualified TenantShardId. +#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] +pub struct ShardIndex { + pub shard_number: ShardNumber, + pub shard_count: ShardCount, +} + +/// Formatting helper, for generating the `shard_id` label in traces. +pub struct ShardSlug<'a>(&'a TenantShardId); + +/// TenantShardId globally identifies a particular shard in a particular tenant. +/// +/// These are written as `-`, for example: +/// # The second shard in a two-shard tenant +/// 072f1291a5310026820b2fe4b2968934-0102 +/// +/// If the `ShardCount` is _unsharded_, the `TenantShardId` is written without +/// a shard suffix and is equivalent to the encoding of a `TenantId`: this enables +/// an unsharded [`TenantShardId`] to be used interchangably with a [`TenantId`]. +/// +/// The human-readable encoding of an unsharded TenantShardId, such as used in API URLs, +/// is both forward and backward compatible with TenantId: a legacy TenantId can be +/// decoded as a TenantShardId, and when re-encoded it will be parseable +/// as a TenantId. +#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)] +pub struct TenantShardId { + pub tenant_id: TenantId, + pub shard_number: ShardNumber, + pub shard_count: ShardCount, +} + +impl ShardCount { + pub const MAX: Self = Self(u8::MAX); + + /// The internal value of a ShardCount may be zero, which means "1 shard, but use + /// legacy format for TenantShardId that excludes the shard suffix", also known + /// as [`TenantShardId::unsharded`]. + /// + /// This method returns the actual number of shards, i.e. if our internal value is + /// zero, we return 1 (unsharded tenants have 1 shard). + pub fn count(&self) -> u8 { + if self.0 > 0 { + self.0 + } else { + 1 + } + } + + /// The literal internal value: this is **not** the number of shards in the + /// tenant, as we have a special zero value for legacy unsharded tenants. Use + /// [`Self::count`] if you want to know the cardinality of shards. + pub fn literal(&self) -> u8 { + self.0 + } + + /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but + /// uses the legacy format for `TenantShardId`. See also the documentation for + /// [`Self::count`]. + pub fn is_unsharded(&self) -> bool { + self.0 == 0 + } + + /// `v` may be zero, or the number of shards in the tenant. `v` is what + /// [`Self::literal`] would return. + pub const fn new(val: u8) -> Self { + Self(val) + } +} + +impl ShardNumber { + pub const MAX: Self = Self(u8::MAX); +} + +impl TenantShardId { + pub fn unsharded(tenant_id: TenantId) -> Self { + Self { + tenant_id, + shard_number: ShardNumber(0), + shard_count: ShardCount(0), + } + } + + /// The range of all TenantShardId that belong to a particular TenantId. This is useful when + /// you have a BTreeMap of TenantShardId, and are querying by TenantId. + pub fn tenant_range(tenant_id: TenantId) -> RangeInclusive { + RangeInclusive::new( + Self { + tenant_id, + shard_number: ShardNumber(0), + shard_count: ShardCount(0), + }, + Self { + tenant_id, + shard_number: ShardNumber::MAX, + shard_count: ShardCount::MAX, + }, + ) + } + + pub fn shard_slug(&self) -> impl std::fmt::Display + '_ { + ShardSlug(self) + } + + /// Convenience for code that has special behavior on the 0th shard. + pub fn is_shard_zero(&self) -> bool { + self.shard_number == ShardNumber(0) + } + + /// The "unsharded" value is distinct from simply having a single shard: it represents + /// a tenant which is not shard-aware at all, and whose storage paths will not include + /// a shard suffix. + pub fn is_unsharded(&self) -> bool { + self.shard_number == ShardNumber(0) && self.shard_count.is_unsharded() + } + + /// Convenience for dropping the tenant_id and just getting the ShardIndex: this + /// is useful when logging from code that is already in a span that includes tenant ID, to + /// keep messages reasonably terse. + pub fn to_index(&self) -> ShardIndex { + ShardIndex { + shard_number: self.shard_number, + shard_count: self.shard_count, + } + } + + /// Calculate the children of this TenantShardId when splitting the overall tenant into + /// the given number of shards. + pub fn split(&self, new_shard_count: ShardCount) -> Vec { + let effective_old_shard_count = std::cmp::max(self.shard_count.0, 1); + let mut child_shards = Vec::new(); + for shard_number in 0..ShardNumber(new_shard_count.0).0 { + // Key mapping is based on a round robin mapping of key hash modulo shard count, + // so our child shards are the ones which the same keys would map to. + if shard_number % effective_old_shard_count == self.shard_number.0 { + child_shards.push(TenantShardId { + tenant_id: self.tenant_id, + shard_number: ShardNumber(shard_number), + shard_count: new_shard_count, + }) + } + } + + child_shards + } +} + +impl<'a> std::fmt::Display for ShardSlug<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{:02x}{:02x}", + self.0.shard_number.0, self.0.shard_count.0 + ) + } +} + +impl std::fmt::Display for TenantShardId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.shard_count != ShardCount(0) { + write!(f, "{}-{}", self.tenant_id, self.shard_slug()) + } else { + // Legacy case (shard_count == 0) -- format as just the tenant id. Note that this + // is distinct from the normal single shard case (shard count == 1). + self.tenant_id.fmt(f) + } + } +} + +impl std::fmt::Debug for TenantShardId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Debug is the same as Display: the compact hex representation + write!(f, "{}", self) + } +} + +impl std::str::FromStr for TenantShardId { + type Err = hex::FromHexError; + + fn from_str(s: &str) -> Result { + // Expect format: 16 byte TenantId, '-', 1 byte shard number, 1 byte shard count + if s.len() == 32 { + // Legacy case: no shard specified + Ok(Self { + tenant_id: TenantId::from_str(s)?, + shard_number: ShardNumber(0), + shard_count: ShardCount(0), + }) + } else if s.len() == 37 { + let bytes = s.as_bytes(); + let tenant_id = TenantId::from_hex(&bytes[0..32])?; + let mut shard_parts: [u8; 2] = [0u8; 2]; + hex::decode_to_slice(&bytes[33..37], &mut shard_parts)?; + Ok(Self { + tenant_id, + shard_number: ShardNumber(shard_parts[0]), + shard_count: ShardCount(shard_parts[1]), + }) + } else { + Err(hex::FromHexError::InvalidStringLength) + } + } +} + +impl From<[u8; 18]> for TenantShardId { + fn from(b: [u8; 18]) -> Self { + let tenant_id_bytes: [u8; 16] = b[0..16].try_into().unwrap(); + + Self { + tenant_id: TenantId::from(tenant_id_bytes), + shard_number: ShardNumber(b[16]), + shard_count: ShardCount(b[17]), + } + } +} + +impl ShardIndex { + pub fn new(number: ShardNumber, count: ShardCount) -> Self { + Self { + shard_number: number, + shard_count: count, + } + } + pub fn unsharded() -> Self { + Self { + shard_number: ShardNumber(0), + shard_count: ShardCount(0), + } + } + + /// The "unsharded" value is distinct from simply having a single shard: it represents + /// a tenant which is not shard-aware at all, and whose storage paths will not include + /// a shard suffix. + pub fn is_unsharded(&self) -> bool { + self.shard_number == ShardNumber(0) && self.shard_count == ShardCount(0) + } + + /// For use in constructing remote storage paths: concatenate this with a TenantId + /// to get a fully qualified TenantShardId. + /// + /// Backward compat: this function returns an empty string if Self::is_unsharded, such + /// that the legacy pre-sharding remote key format is preserved. + pub fn get_suffix(&self) -> String { + if self.is_unsharded() { + "".to_string() + } else { + format!("-{:02x}{:02x}", self.shard_number.0, self.shard_count.0) + } + } +} + +impl std::fmt::Display for ShardIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:02x}{:02x}", self.shard_number.0, self.shard_count.0) + } +} + +impl std::fmt::Debug for ShardIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Debug is the same as Display: the compact hex representation + write!(f, "{}", self) + } +} + +impl std::str::FromStr for ShardIndex { + type Err = hex::FromHexError; + + fn from_str(s: &str) -> Result { + // Expect format: 1 byte shard number, 1 byte shard count + if s.len() == 4 { + let bytes = s.as_bytes(); + let mut shard_parts: [u8; 2] = [0u8; 2]; + hex::decode_to_slice(bytes, &mut shard_parts)?; + Ok(Self { + shard_number: ShardNumber(shard_parts[0]), + shard_count: ShardCount(shard_parts[1]), + }) + } else { + Err(hex::FromHexError::InvalidStringLength) + } + } +} + +impl From<[u8; 2]> for ShardIndex { + fn from(b: [u8; 2]) -> Self { + Self { + shard_number: ShardNumber(b[0]), + shard_count: ShardCount(b[1]), + } + } +} + +impl Serialize for TenantShardId { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if serializer.is_human_readable() { + serializer.collect_str(self) + } else { + // Note: while human encoding of [`TenantShardId`] is backward and forward + // compatible, this binary encoding is not. + let mut packed: [u8; 18] = [0; 18]; + packed[0..16].clone_from_slice(&self.tenant_id.as_arr()); + packed[16] = self.shard_number.0; + packed[17] = self.shard_count.0; + + packed.serialize(serializer) + } + } +} + +impl<'de> Deserialize<'de> for TenantShardId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct IdVisitor { + is_human_readable_deserializer: bool, + } + + impl<'de> serde::de::Visitor<'de> for IdVisitor { + type Value = TenantShardId; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + if self.is_human_readable_deserializer { + formatter.write_str("value in form of hex string") + } else { + formatter.write_str("value in form of integer array([u8; 18])") + } + } + + fn visit_seq(self, seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let s = serde::de::value::SeqAccessDeserializer::new(seq); + let id: [u8; 18] = Deserialize::deserialize(s)?; + Ok(TenantShardId::from(id)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + TenantShardId::from_str(v).map_err(E::custom) + } + } + + if deserializer.is_human_readable() { + deserializer.deserialize_str(IdVisitor { + is_human_readable_deserializer: true, + }) + } else { + deserializer.deserialize_tuple( + 18, + IdVisitor { + is_human_readable_deserializer: false, + }, + ) + } + } +} + +impl Serialize for ShardIndex { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + if serializer.is_human_readable() { + serializer.collect_str(self) + } else { + // Binary encoding is not used in index_part.json, but is included in anticipation of + // switching various structures (e.g. inter-process communication, remote metadata) to more + // compact binary encodings in future. + let mut packed: [u8; 2] = [0; 2]; + packed[0] = self.shard_number.0; + packed[1] = self.shard_count.0; + packed.serialize(serializer) + } + } +} + +impl<'de> Deserialize<'de> for ShardIndex { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct IdVisitor { + is_human_readable_deserializer: bool, + } + + impl<'de> serde::de::Visitor<'de> for IdVisitor { + type Value = ShardIndex; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + if self.is_human_readable_deserializer { + formatter.write_str("value in form of hex string") + } else { + formatter.write_str("value in form of integer array([u8; 2])") + } + } + + fn visit_seq(self, seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let s = serde::de::value::SeqAccessDeserializer::new(seq); + let id: [u8; 2] = Deserialize::deserialize(s)?; + Ok(ShardIndex::from(id)) + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + ShardIndex::from_str(v).map_err(E::custom) + } + } + + if deserializer.is_human_readable() { + deserializer.deserialize_str(IdVisitor { + is_human_readable_deserializer: true, + }) + } else { + deserializer.deserialize_tuple( + 2, + IdVisitor { + is_human_readable_deserializer: false, + }, + ) + } + } +}