From 55857e198ef7cf8dafa069d44764613c6bdeaf3d Mon Sep 17 00:00:00 2001 From: Grey Golla Date: Thu, 11 Apr 2024 21:34:55 -0700 Subject: [PATCH 1/2] Add "backlink" from value array to bucket array in FUM This allows us to directly find the bucket that points to each value in the value array, which makes erasing elements given an iterator faster (before we needed one key lookup for each element erased). Practically, this makes clearing large "chunks" of the FixedMap faster. --- .../fixed_robinhood_hashtable.hpp | 41 ++++++++++++++++++- test/fixed_robinhood_hashtable_test.cpp | 36 ++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/include/fixed_containers/fixed_robinhood_hashtable.hpp b/include/fixed_containers/fixed_robinhood_hashtable.hpp index d7e84cc3..84b17d17 100644 --- a/include/fixed_containers/fixed_robinhood_hashtable.hpp +++ b/include/fixed_containers/fixed_robinhood_hashtable.hpp @@ -123,8 +123,17 @@ class FixedRobinhoodHashtable Hash IMPLEMENTATION_DETAIL_DO_NOT_USE_hash_{}; KeyEqual IMPLEMENTATION_DETAIL_DO_NOT_USE_key_equal_{}; + + // this stores the Key, Value pairs that we actually need to store. The LinkedList is what + // provides a stable iteration order. fixed_doubly_linked_list_detail::FixedDoublyLinkedList IMPLEMENTATION_DETAIL_DO_NOT_USE_value_storage_{}; + // this stores a "backlink" from a given Value array index to the bucket index that points at it + std::array IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_{}; + + // this is the array that is indexed by the hash of the key. Each "bucket" contains an index to + // a slot in the `value_storage_`, as well as a `dist_and_fingerprint` that is needed to + // implement robinhood hashing std::array IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_{}; struct OpaqueIndexType @@ -149,6 +158,16 @@ class FixedRobinhoodHashtable return IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_[idx]; } + [[nodiscard]] constexpr SizeType& bucket_for_value_index(SizeType value_idx) + { + return IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_[value_idx]; + } + + [[nodiscard]] constexpr const SizeType& bucket_for_value_index(SizeType value_idx) const + { + return IMPLEMENTATION_DETAIL_DO_NOT_USE_value_index_to_bucket_index_[value_idx]; + } + template [[nodiscard]] constexpr std::uint64_t hash(const Key& k) const { @@ -190,10 +209,20 @@ class FixedRobinhoodHashtable // until we hit an empty one while (0 != bucket_at(table_loc).dist_and_fingerprint_) { + // update the backlink of the value pointed to by the bucket we're about to put in + // table_loc + bucket_for_value_index(bucket.value_index_) = table_loc; + // put `bucket` in `table_loc` and then assign `bucket` to whatever used to be in + // `table_loc` bucket = std::exchange(bucket_at(table_loc), bucket); + // increment the distance of the thing we just evicted, it will be placed one slot over + // at the top of this loop bucket = bucket.plus_dist(); + // go to the next table slot table_loc = next_bucket_index(table_loc); } + // update the backlink of the value pointed to by the bucket we're about to put in table_loc + bucket_for_value_index(bucket.value_index_) = table_loc; bucket_at(table_loc) = bucket; } @@ -205,9 +234,18 @@ class FixedRobinhoodHashtable SizeType next_loc = next_bucket_index(table_loc); while (bucket_at(next_loc).dist_and_fingerprint_ >= Bucket::DIST_INC * 2) { + // overwrite the bucket at table_loc with the bucket at next_loc (accounting for the + // change in distance) bucket_at(table_loc) = bucket_at(next_loc).minus_dist(); + + // update the backlink for the shifted element + bucket_for_value_index(bucket_at(table_loc).value_index_) = table_loc; + + // shift both table_loc and next_loc forward one index table_loc = std::exchange(next_loc, next_bucket_index(next_loc)); } + // zero out the thing we're pointed at now, it was copied back one (or it is the thing we + // wanted to delete) bucket_at(table_loc) = {}; } @@ -349,7 +387,8 @@ class FixedRobinhoodHashtable SizeType cur_index = start_value_index; while (cur_index != end_value_index) { - cur_index = erase(opaque_index_of(key_at(cur_index))); + OpaqueIndexType i = {bucket_for_value_index(cur_index), 0}; + cur_index = erase(i); } return end_value_index; diff --git a/test/fixed_robinhood_hashtable_test.cpp b/test/fixed_robinhood_hashtable_test.cpp index 09434658..5b38af87 100644 --- a/test/fixed_robinhood_hashtable_test.cpp +++ b/test/fixed_robinhood_hashtable_test.cpp @@ -121,6 +121,7 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 13); EXPECT_EQ(map.bucket_at(3).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 3); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); @@ -132,12 +133,14 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 33); EXPECT_EQ(map.bucket_at(3).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 3); EXPECT_EQ(map.key_at(1), 33); EXPECT_EQ(map.value_at(1), 42); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 13); EXPECT_EQ(map.bucket_at(4).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 4); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); @@ -150,6 +153,7 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(9).dist(), 1); EXPECT_EQ(map.bucket_at(9).fingerprint(), 9); EXPECT_EQ(map.bucket_at(9).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 9); EXPECT_EQ(map.key_at(2), 9); EXPECT_EQ(map.value_at(2), 123); @@ -162,18 +166,21 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 43); EXPECT_EQ(map.bucket_at(3).value_index_, 3); + EXPECT_EQ(map.bucket_for_value_index(3), 3); EXPECT_EQ(map.key_at(3), 43); EXPECT_EQ(map.value_at(3), 999); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 33); EXPECT_EQ(map.bucket_at(4).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 4); EXPECT_EQ(map.key_at(1), 33); EXPECT_EQ(map.value_at(1), 42); EXPECT_EQ(map.bucket_at(5).dist(), 3); EXPECT_EQ(map.bucket_at(5).fingerprint(), 13); EXPECT_EQ(map.bucket_at(5).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 5); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); @@ -185,6 +192,7 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(6).dist(), 1); EXPECT_EQ(map.bucket_at(6).fingerprint(), 6); EXPECT_EQ(map.bucket_at(6).value_index_, 4); + EXPECT_EQ(map.bucket_for_value_index(4), 6); EXPECT_EQ(map.key_at(4), 6); EXPECT_EQ(map.value_at(4), 1000); @@ -198,30 +206,35 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 43); EXPECT_EQ(map.bucket_at(3).value_index_, 3); + EXPECT_EQ(map.bucket_for_value_index(3), 3); EXPECT_EQ(map.key_at(3), 43); EXPECT_EQ(map.value_at(3), 999); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 33); EXPECT_EQ(map.bucket_at(4).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 4); EXPECT_EQ(map.key_at(1), 33); EXPECT_EQ(map.value_at(1), 42); EXPECT_EQ(map.bucket_at(5).dist(), 3); EXPECT_EQ(map.bucket_at(5).fingerprint(), 23); EXPECT_EQ(map.bucket_at(5).value_index_, 5); + EXPECT_EQ(map.bucket_for_value_index(5), 5); EXPECT_EQ(map.key_at(5), 23); EXPECT_EQ(map.value_at(5), 3232); EXPECT_EQ(map.bucket_at(6).dist(), 4); EXPECT_EQ(map.bucket_at(6).fingerprint(), 13); EXPECT_EQ(map.bucket_at(6).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 6); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); EXPECT_EQ(map.bucket_at(7).dist(), 2); EXPECT_EQ(map.bucket_at(7).fingerprint(), 6); EXPECT_EQ(map.bucket_at(7).value_index_, 4); + EXPECT_EQ(map.bucket_for_value_index(4), 7); EXPECT_EQ(map.key_at(4), 6); EXPECT_EQ(map.value_at(4), 1000); @@ -233,18 +246,21 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(6).dist(), 4); EXPECT_EQ(map.bucket_at(6).fingerprint(), 13); EXPECT_EQ(map.bucket_at(6).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 6); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); EXPECT_EQ(map.bucket_at(7).dist(), 2); EXPECT_EQ(map.bucket_at(7).fingerprint(), 66); EXPECT_EQ(map.bucket_at(7).value_index_, 6); + EXPECT_EQ(map.bucket_for_value_index(6), 7); EXPECT_EQ(map.key_at(6), 66); EXPECT_EQ(map.value_at(6), 66); EXPECT_EQ(map.bucket_at(8).dist(), 3); EXPECT_EQ(map.bucket_at(8).fingerprint(), 6); EXPECT_EQ(map.bucket_at(8).value_index_, 4); + EXPECT_EQ(map.bucket_for_value_index(4), 8); EXPECT_EQ(map.key_at(4), 6); EXPECT_EQ(map.value_at(4), 1000); @@ -257,18 +273,21 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(8).dist(), 3); EXPECT_EQ(map.bucket_at(8).fingerprint(), 6); EXPECT_EQ(map.bucket_at(8).value_index_, 4); + EXPECT_EQ(map.bucket_for_value_index(4), 8); EXPECT_EQ(map.key_at(4), 6); EXPECT_EQ(map.value_at(4), 1000); EXPECT_EQ(map.bucket_at(9).dist(), 2); EXPECT_EQ(map.bucket_at(9).fingerprint(), 128); EXPECT_EQ(map.bucket_at(9).value_index_, 7); + EXPECT_EQ(map.bucket_for_value_index(7), 9); EXPECT_EQ(map.key_at(7), 128); EXPECT_EQ(map.value_at(7), 256); EXPECT_EQ(map.bucket_at(0).dist(), 2); EXPECT_EQ(map.bucket_at(0).fingerprint(), 9); EXPECT_EQ(map.bucket_at(0).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 0); EXPECT_EQ(map.key_at(2), 9); EXPECT_EQ(map.value_at(2), 123); @@ -279,12 +298,14 @@ TEST(MapOperations, Emplace) EXPECT_EQ(map.bucket_at(0).dist(), 2); EXPECT_EQ(map.bucket_at(0).fingerprint(), 9); EXPECT_EQ(map.bucket_at(0).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 0); EXPECT_EQ(map.key_at(2), 9); EXPECT_EQ(map.value_at(2), 123); EXPECT_EQ(map.bucket_at(1).dist(), 2); EXPECT_EQ(map.bucket_at(1).fingerprint(), 0); EXPECT_EQ(map.bucket_at(1).value_index_, 8); + EXPECT_EQ(map.bucket_for_value_index(8), 1); EXPECT_EQ(map.key_at(8), 0); EXPECT_EQ(map.value_at(8), -1); } @@ -434,6 +455,7 @@ TEST(MapOperations, Erase) EXPECT_EQ(map.bucket_at(0).dist(), 2); EXPECT_EQ(map.bucket_at(0).fingerprint(), 9); EXPECT_EQ(map.bucket_at(0).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 0); EXPECT_EQ(map.key_at(2), 9); EXPECT_EQ(map.value_at(2), 123); @@ -457,12 +479,14 @@ TEST(MapOperations, Erase) EXPECT_EQ(map.bucket_at(8).dist(), 1); EXPECT_EQ(map.bucket_at(8).fingerprint(), 128); EXPECT_EQ(map.bucket_at(8).value_index_, 7); + EXPECT_EQ(map.bucket_for_value_index(7), 8); EXPECT_EQ(map.key_at(7), 128); EXPECT_EQ(map.value_at(7), 256); EXPECT_EQ(map.bucket_at(9).dist(), 1); EXPECT_EQ(map.bucket_at(9).fingerprint(), 9); EXPECT_EQ(map.bucket_at(9).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 9); EXPECT_EQ(map.key_at(2), 9); EXPECT_EQ(map.value_at(2), 123); @@ -494,12 +518,14 @@ TEST(MapOperations, Erase) EXPECT_EQ(map.bucket_at(5).dist(), 3); EXPECT_EQ(map.bucket_at(5).fingerprint(), 13); EXPECT_EQ(map.bucket_at(5).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 5); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 1); EXPECT_EQ(map.bucket_at(6).dist(), 1); EXPECT_EQ(map.bucket_at(6).fingerprint(), 66); EXPECT_EQ(map.bucket_at(6).value_index_, 6); + EXPECT_EQ(map.bucket_for_value_index(6), 6); EXPECT_EQ(map.key_at(6), 66); EXPECT_EQ(map.value_at(6), 66); @@ -509,6 +535,7 @@ TEST(MapOperations, Erase) EXPECT_EQ(map.bucket_at(8).dist(), 1); EXPECT_EQ(map.bucket_at(8).fingerprint(), 128); EXPECT_EQ(map.bucket_at(8).value_index_, 7); + EXPECT_EQ(map.bucket_for_value_index(7), 8); EXPECT_EQ(map.key_at(7), 128); EXPECT_EQ(map.value_at(7), 256); @@ -766,12 +793,14 @@ TEST(MapCornerCases, PerfectCollisions) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 13); EXPECT_EQ(map.bucket_at(3).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 3); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 0); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 13); EXPECT_EQ(map.bucket_at(4).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 4); EXPECT_EQ(map.key_at(1), 1293); EXPECT_EQ(map.value_at(1), 1); @@ -800,18 +829,21 @@ TEST(MapCornerCases, PerfectCollisions) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 23); EXPECT_EQ(map.bucket_at(3).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 3); EXPECT_EQ(map.key_at(2), 23); EXPECT_EQ(map.value_at(2), 2); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 13); EXPECT_EQ(map.bucket_at(4).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 4); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 0); EXPECT_EQ(map.bucket_at(5).dist(), 3); EXPECT_EQ(map.bucket_at(5).fingerprint(), 13); EXPECT_EQ(map.bucket_at(5).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 5); EXPECT_EQ(map.key_at(1), 1293); EXPECT_EQ(map.value_at(1), 1); @@ -836,24 +868,28 @@ TEST(MapCornerCases, PerfectCollisions) EXPECT_EQ(map.bucket_at(3).dist(), 1); EXPECT_EQ(map.bucket_at(3).fingerprint(), 23); EXPECT_EQ(map.bucket_at(3).value_index_, 2); + EXPECT_EQ(map.bucket_for_value_index(2), 3); EXPECT_EQ(map.key_at(2), 23); EXPECT_EQ(map.value_at(2), 2); EXPECT_EQ(map.bucket_at(4).dist(), 2); EXPECT_EQ(map.bucket_at(4).fingerprint(), 13); EXPECT_EQ(map.bucket_at(4).value_index_, 0); + EXPECT_EQ(map.bucket_for_value_index(0), 4); EXPECT_EQ(map.key_at(0), 13); EXPECT_EQ(map.value_at(0), 0); EXPECT_EQ(map.bucket_at(5).dist(), 3); EXPECT_EQ(map.bucket_at(5).fingerprint(), 13); EXPECT_EQ(map.bucket_at(5).value_index_, 1); + EXPECT_EQ(map.bucket_for_value_index(1), 5); EXPECT_EQ(map.key_at(1), 1293); EXPECT_EQ(map.value_at(1), 1); EXPECT_EQ(map.bucket_at(6).dist(), 3); EXPECT_EQ(map.bucket_at(6).fingerprint(), 24); EXPECT_EQ(map.bucket_at(6).value_index_, 3); + EXPECT_EQ(map.bucket_for_value_index(3), 6); EXPECT_EQ(map.key_at(3), 24); EXPECT_EQ(map.value_at(3), 3); From 219a7aaed8a531494d73279ea84c8d167b1e4df1 Mon Sep 17 00:00:00 2001 From: Grey Golla Date: Thu, 11 Apr 2024 17:36:42 -0700 Subject: [PATCH 2/2] Implement efficient clearing of the Hashtable Instead of iterating over the linked list (also requires hash lookups) and erasing one by one, just reset the backing linked list and value storage, then 0 out the bucket array. --- .../fixed_doubly_linked_list.hpp | 27 +++++++++++++++---- .../fixed_robinhood_hashtable.hpp | 9 ++++++- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/include/fixed_containers/fixed_doubly_linked_list.hpp b/include/fixed_containers/fixed_doubly_linked_list.hpp index 11b6fa2b..3e65fbe1 100644 --- a/include/fixed_containers/fixed_doubly_linked_list.hpp +++ b/include/fixed_containers/fixed_doubly_linked_list.hpp @@ -18,6 +18,7 @@ struct LinkedListIndices template class FixedDoublyLinkedListBase { +protected: static_assert(MAXIMUM_SIZE + 1 <= std::numeric_limits::max(), "must be able to index MAXIMUM_SIZE+1 elements with IndexType"); using StorageType = FixedIndexBasedPoolStorage; @@ -52,11 +53,6 @@ class FixedDoublyLinkedListBase } [[nodiscard]] constexpr bool full() const noexcept { return storage().full(); } - constexpr void clear() noexcept - { - delete_range_and_return_next_index(front_index(), MAXIMUM_SIZE); - } - constexpr const T& at(const IndexType i) const { return storage().at(i); } constexpr T& at(const IndexType i) { return storage().at(i); } @@ -227,6 +223,11 @@ class FixedDoublyLinkedList : public FixedDoublyLinkedListBasedelete_range_and_return_next_index(this->front_index(), MAXIMUM_SIZE); + } + constexpr ~FixedDoublyLinkedList() noexcept { this->clear(); } }; @@ -240,6 +241,22 @@ class FixedDoublyLinkedList // clang-format off constexpr FixedDoublyLinkedList() noexcept : Base() { } // clang-format on + + constexpr void clear() noexcept + { + // Instead of iterating over the elements of the linked list (slow), just reset the backing + // storage + std::construct_at(&(this->IMPLEMENTATION_DETAIL_DO_NOT_USE_storage_)); + + // And reset the start/end sentinel to point at itself. + // The remaining links of the linked list will be overwritten as elements are allocated, so + // we don't have to reset the entire chain_ array + this->next_of(MAXIMUM_SIZE) = MAXIMUM_SIZE; + this->prev_of(MAXIMUM_SIZE) = MAXIMUM_SIZE; + + // Finally, set the size back to 0 + this->IMPLEMENTATION_DETAIL_DO_NOT_USE_size_ = 0; + } }; } // namespace fixed_containers::fixed_doubly_linked_list_detail::specializations diff --git a/include/fixed_containers/fixed_robinhood_hashtable.hpp b/include/fixed_containers/fixed_robinhood_hashtable.hpp index 84b17d17..86c97a1e 100644 --- a/include/fixed_containers/fixed_robinhood_hashtable.hpp +++ b/include/fixed_containers/fixed_robinhood_hashtable.hpp @@ -394,7 +394,14 @@ class FixedRobinhoodHashtable return end_value_index; } - constexpr void clear() { erase_range(begin_index(), end_index()); } + constexpr void clear() + { + // reset the backing linked list + IMPLEMENTATION_DETAIL_DO_NOT_USE_value_storage_.clear(); + + // reset the bucket array + IMPLEMENTATION_DETAIL_DO_NOT_USE_bucket_array_.fill({}); + } public: constexpr FixedRobinhoodHashtable() = default;