Skip to content

Commit

Permalink
Further sequester Group/Tag code
Browse files Browse the repository at this point in the history
  • Loading branch information
clarfonthey committed Oct 15, 2024
1 parent eea9804 commit 3df96dd
Show file tree
Hide file tree
Showing 10 changed files with 168 additions and 135 deletions.
4 changes: 2 additions & 2 deletions src/raw/bitmask.rs → src/control/bitmask.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::imp::{
use super::group::{
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
};

Expand Down Expand Up @@ -102,7 +102,7 @@ impl IntoIterator for BitMask {

/// Iterator over the contents of a `BitMask`, returning the indices of set
/// bits.
#[derive(Copy, Clone)]
#[derive(Clone)]
pub(crate) struct BitMaskIter(pub(crate) BitMask);

impl Iterator for BitMaskIter {
Expand Down
9 changes: 3 additions & 6 deletions src/raw/generic.rs → src/control/group/generic.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::bitmask::BitMask;
use super::Tag;
use super::super::{BitMask, Tag};
use core::{mem, ptr};

// Use the native word size as the group size. Using a 64-bit group size on
Expand Down Expand Up @@ -81,8 +80,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(ptr::read(ptr.cast()))
}

Expand All @@ -91,8 +89,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
ptr::write(ptr.cast(), self.0);
}

Expand Down
35 changes: 35 additions & 0 deletions src/control/group/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
cfg_if! {
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
// at once instead of 8. We don't bother with AVX since it would require
// runtime dispatch and wouldn't gain us much anyways: the probability of
// finding a match drops off drastically after the first few buckets.
//
// I attempted an implementation on ARM using NEON instructions, but it
// turns out that most NEON instructions have multi-cycle latency, which in
// the end outweighs any gains over the generic implementation.
if #[cfg(all(
target_feature = "sse2",
any(target_arch = "x86", target_arch = "x86_64"),
not(miri),
))] {
mod sse2;
use sse2 as imp;
} else if #[cfg(all(
target_arch = "aarch64",
target_feature = "neon",
// NEON intrinsics are currently broken on big-endian targets.
// See https://github.com/rust-lang/stdarch/issues/1484.
target_endian = "little",
not(miri),
))] {
mod neon;
use neon as imp;
} else {
mod generic;
use generic as imp;
}
}
pub(crate) use self::imp::Group;
pub(super) use self::imp::{
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
};
9 changes: 3 additions & 6 deletions src/raw/neon.rs → src/control/group/neon.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::bitmask::BitMask;
use super::Tag;
use super::super::{BitMask, Tag};
use core::arch::aarch64 as neon;
use core::mem;
use core::num::NonZeroU64;
Expand Down Expand Up @@ -52,8 +51,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0)
Group(neon::vld1_u8(ptr.cast()))
}

Expand All @@ -62,8 +60,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0)
neon::vst1_u8(ptr.cast(), self.0);
}

Expand Down
9 changes: 3 additions & 6 deletions src/raw/sse2.rs → src/control/group/sse2.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::bitmask::BitMask;
use super::Tag;
use super::super::{BitMask, Tag};
use core::mem;
use core::num::NonZeroU16;

Expand Down Expand Up @@ -58,8 +57,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(x86::_mm_load_si128(ptr.cast()))
}

Expand All @@ -68,8 +66,7 @@ impl Group {
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
// FIXME: use align_offset once it stabilizes
debug_assert_eq!(ptr as usize & (mem::align_of::<Self>() - 1), 0);
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
x86::_mm_store_si128(ptr.cast(), self.0);
}

Expand Down
10 changes: 10 additions & 0 deletions src/control/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
mod bitmask;
mod group;
mod tag;

use self::bitmask::BitMask;
pub(crate) use self::{
bitmask::BitMaskIter,
group::Group,
tag::{Tag, TagSliceExt},
};
81 changes: 81 additions & 0 deletions src/control/tag.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
use core::{fmt, mem};

/// Single tag in a control group.
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub(crate) struct Tag(pub(super) u8);
impl Tag {
/// Control tag value for an empty bucket.
pub(crate) const EMPTY: Tag = Tag(0b1111_1111);

/// Control tag value for a deleted bucket.
pub(crate) const DELETED: Tag = Tag(0b1000_0000);

/// Checks whether a control tag represents a full bucket (top bit is clear).
#[inline]
pub(crate) const fn is_full(self) -> bool {
self.0 & 0x80 == 0
}

/// Checks whether a control tag represents a special value (top bit is set).
#[inline]
pub(crate) const fn is_special(self) -> bool {
self.0 & 0x80 != 0
}

/// Checks whether a special control value is EMPTY (just check 1 bit).
#[inline]
pub(crate) const fn special_is_empty(self) -> bool {
debug_assert!(self.is_special());
self.0 & 0x01 != 0
}

/// Creates a control tag representing a full bucket with the given hash.
#[inline]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn full(hash: u64) -> Tag {
// Constant for function that grabs the top 7 bits of the hash.
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
mem::size_of::<usize>()
} else {
mem::size_of::<u64>()
};

// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
// value, some hash functions (such as FxHash) produce a usize result
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
// So we use MIN_HASH_LEN constant to handle this.
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
Tag((top7 & 0x7f) as u8) // truncation
}
}
impl fmt::Debug for Tag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_special() {
if self.special_is_empty() {
f.pad("EMPTY")
} else {
f.pad("DELETED")
}
} else {
f.debug_tuple("full").field(&(self.0 & 0x7F)).finish()
}
}
}

/// Extension trait for slices of tags.
pub(crate) trait TagSliceExt {
/// Fills the control with the given tag.
fn fill_tag(&mut self, tag: Tag);

/// Clears out the control.
fn fill_empty(&mut self) {
self.fill_tag(Tag::EMPTY)
}
}
impl TagSliceExt for [Tag] {
fn fill_tag(&mut self, tag: Tag) {
// SAFETY: We have access to the entire slice, so, we can write to the entire slice.
unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) }
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ doc_comment::doctest!("../README.md");
#[macro_use]
mod macros;

mod control;
mod raw;
mod util;

mod external_trait_impls;
mod map;
Expand Down
Loading

0 comments on commit 3df96dd

Please sign in to comment.