diff --git a/Cargo.toml b/Cargo.toml index 7e50b438f..68a2e2de2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,9 @@ doc-comment = "0.3.1" bumpalo = { version = "3.13.0", features = ["allocator-api2"] } rkyv = { version = "0.7.42", features = ["validation"] } +[target.'cfg(unix)'.dev-dependencies] +libc = "0.2" + [features] default = ["ahash", "inline-more", "allocator-api2"] diff --git a/src/map.rs b/src/map.rs index 88a826582..b1aba22c7 100644 --- a/src/map.rs +++ b/src/map.rs @@ -8958,3 +8958,103 @@ mod test_map { assert_eq!(dropped.load(Ordering::SeqCst), 0); } } + +#[cfg(all(test, unix))] +mod test_map_with_mmap_allocations { + use super::HashMap; + use allocator_api2::alloc::{AllocError, Allocator}; + use core::alloc::Layout; + use core::ptr::{null_mut, NonNull}; + + /// This is not a production quality allocator, just good enough for + /// some basic tests. + #[derive(Clone, Copy, Debug)] + struct MmapAllocator { + /// Guarantee this is a power of 2. + page_size: usize, + } + + impl MmapAllocator { + fn new() -> Result { + let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + if result < 1 { + return Err(AllocError); + } + + let page_size = result as usize; + if !page_size.is_power_of_two() { + Err(AllocError) + } else { + Ok(Self { page_size }) + } + } + + fn fit_to_page_size(&self, n: usize) -> Result { + // If n=0, give a single page (wasteful, I know). + let n = if n == 0 { self.page_size } else { n }; + + match n & (self.page_size - 1) { + 0 => Ok(n), + rem => n.checked_add(self.page_size - rem).ok_or(AllocError), + } + } + } + + unsafe impl Allocator for MmapAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.align() > self.page_size { + return Err(AllocError); + } + + let size = self.fit_to_page_size(layout.size())?; + let null = null_mut(); + let len = size as libc::size_t; + let prot = libc::PROT_READ | libc::PROT_WRITE; + let flags = libc::MAP_PRIVATE | libc::MAP_ANON; + let result = unsafe { libc::mmap(null, len, prot, flags, -1, 0) }; + + if result == libc::MAP_FAILED { + return Err(AllocError); + } + + let addr = NonNull::new(result.cast()).ok_or(AllocError)?; + Ok(NonNull::slice_from_raw_parts(addr, size)) + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // If they allocated it with this layout, it must round correctly. + let size = self.fit_to_page_size(layout.size()).unwrap(); + _ = libc::munmap(ptr.as_ptr().cast(), size); + } + } + + #[test] + fn test_tiny_allocation_gets_rounded_to_page_size() { + let alloc = MmapAllocator::new().unwrap(); + let mut map: HashMap = HashMap::with_capacity_in(1, alloc); + + let rough_bucket_size = core::mem::size_of::<(usize, (), usize)>(); + let x = alloc.page_size / rough_bucket_size; + // x * ¾ should account for control bytes and also load factor, at + // least for realistic page sizes (4096+). + let min_elems = x / 4 * 3; + let capacity = map.capacity(); + assert!(capacity > min_elems, "failed: {capacity} > {min_elems}"); + + // Fill it up. + for i in 0..capacity { + map.insert(i, ()); + } + // Capacity should not have changed and it should be full. + assert_eq!(capacity, map.len()); + assert_eq!(capacity, map.capacity()); + + // Alright, make it grow. + map.insert(capacity, ()); + assert!( + capacity < map.capacity(), + "failed: {capacity} < {}", + map.capacity() + ); + } +} diff --git a/src/raw/alloc.rs b/src/raw/alloc.rs index 15299e7b0..a9f88c259 100644 --- a/src/raw/alloc.rs +++ b/src/raw/alloc.rs @@ -1,4 +1,14 @@ -pub(crate) use self::inner::{do_alloc, Allocator, Global}; +pub(crate) use self::inner::{Allocator, Global}; +use crate::alloc::alloc::Layout; +use core::ptr::NonNull; + +#[allow(clippy::map_err_ignore)] +pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + match alloc.allocate(layout) { + Ok(ptr) => Ok(ptr), + Err(_) => Err(()), + } +} // Nightly-case. // Use unstable `allocator_api` feature. @@ -6,17 +16,7 @@ pub(crate) use self::inner::{do_alloc, Allocator, Global}; // This is used when building for `std`. #[cfg(feature = "nightly")] mod inner { - use crate::alloc::alloc::Layout; pub use crate::alloc::alloc::{Allocator, Global}; - use core::ptr::NonNull; - - #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { - match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.as_non_null_ptr()), - Err(_) => Err(()), - } - } } // Basic non-nightly case. @@ -27,17 +27,7 @@ mod inner { // `core::alloc::Allocator`. #[cfg(all(not(feature = "nightly"), feature = "allocator-api2"))] mod inner { - use crate::alloc::alloc::Layout; pub use allocator_api2::alloc::{Allocator, Global}; - use core::ptr::NonNull; - - #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { - match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.cast()), - Err(_) => Err(()), - } - } } // No-defaults case. @@ -55,7 +45,7 @@ mod inner { #[allow(clippy::missing_safety_doc)] // not exposed outside of this crate pub unsafe trait Allocator { - fn allocate(&self, layout: Layout) -> Result, ()>; + fn allocate(&self, layout: Layout) -> Result, ()>; unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); } @@ -64,8 +54,11 @@ mod inner { unsafe impl Allocator for Global { #[inline] - fn allocate(&self, layout: Layout) -> Result, ()> { - unsafe { NonNull::new(alloc(layout)).ok_or(()) } + fn allocate(&self, layout: Layout) -> Result, ()> { + match unsafe { NonNull::new(alloc(layout)) } { + Some(ptr) => Ok(NonNull::slice_from_raw_parts(ptr, layout.size())), + None => Err(()), + } } #[inline] unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { @@ -79,8 +72,4 @@ mod inner { Global } } - - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { - alloc.allocate(layout) - } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5cfd4a3dc..edad217da 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1736,6 +1736,40 @@ impl RawTableInner { } } +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + let numerator = allocation_size - group_width; + let denominator = table_layout.size + 1; // todo: ZSTs? + let quotient = numerator / denominator; + prev_pow2(quotient) +} + impl RawTableInner { /// Allocates a new [`RawTableInner`] with the given number of buckets. /// The control bytes and buckets are left uninitialized. @@ -1753,7 +1787,7 @@ impl RawTableInner { unsafe fn new_uninitialized( alloc: &A, table_layout: TableLayout, - buckets: usize, + mut buckets: usize, fallibility: Fallibility, ) -> Result where @@ -1762,13 +1796,29 @@ impl RawTableInner { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { + let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; let ptr: NonNull = match do_alloc(alloc, layout) { - Ok(block) => block.cast(), + Ok(block) => { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (_oversized_layout, oversized_ctrl_offset) = + match table_layout.calculate_layout_for(x) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; + debug_assert!(_oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + + block.cast() + } Err(_) => return Err(fallibility.alloc_err(layout)), }; @@ -4586,6 +4636,23 @@ impl RawExtractIf<'_, T, A> { mod test_map { use super::*; + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + #[test] fn test_minimum_capacity_for_small_types() { #[track_caller]