diff --git a/CMakeLists.txt b/CMakeLists.txt index 226bfce..2ce1327 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,11 +23,15 @@ set( boost_ver boost-1.86.0 ) CPMAddPackage( "gh:boostorg/static_assert#${boost_ver}" ) # Boost::core dependency CPMAddPackage( "gh:boostorg/throw_exception#${boost_ver}" ) # Boost::core dependency CPMAddPackage( "gh:boostorg/config#${boost_ver}" ) # Boost::core dependency +CPMAddPackage( "gh:boostorg/intrusive#${boost_ver}" ) # Boost::container dependency CPMAddPackage( "gh:boostorg/io#${boost_ver}" ) # Boost::utility dependency CPMAddPackage( "gh:boostorg/type_traits#${boost_ver}" ) # Boost::utility dependency CPMAddPackage( "gh:boostorg/predef#${boost_ver}" ) # Boost::winapi dependency CPMAddPackage( "gh:boostorg/assert#${boost_ver}" ) +CPMAddPackage( "gh:boostorg/container#${boost_ver}" ) # used only for comparative benchmarking CPMAddPackage( "gh:boostorg/core#${boost_ver}" ) +CPMAddPackage( "gh:boostorg/integer#${boost_ver}" ) +CPMAddPackage( "gh:boostorg/move#${boost_ver}" ) CPMAddPackage( "gh:boostorg/preprocessor#${boost_ver}" ) CPMAddPackage( "gh:boostorg/stl_interfaces#${boost_ver}" ) CPMAddPackage( "gh:boostorg/winapi#${boost_ver}" ) @@ -72,8 +76,11 @@ endif() include( vm.cmake ) target_link_libraries( psi_vm PUBLIC + Boost::container Boost::core Boost::assert + Boost::integer + Boost::move Boost::preprocessor Boost::stl_interfaces Boost::winapi diff --git a/include/psi/vm/containers/b+tree.hpp b/include/psi/vm/containers/b+tree.hpp index 7ef4a21..010227c 100644 --- a/include/psi/vm/containers/b+tree.hpp +++ b/include/psi/vm/containers/b+tree.hpp @@ -8,6 +8,8 @@ #include #include +#include +#include #include #include @@ -39,10 +41,6 @@ namespace detail BOOST_ASSERT( remaining_space >= sizeof( Header ) ); return std::pair{ reinterpret_cast
( data ), std::span{ data + sizeof( Header ), remaining_space - sizeof( Header ) } }; } - - template static constexpr bool is_simple_comparator{ false }; - template static constexpr bool is_simple_comparator>{ true }; - template static constexpr bool is_simple_comparator>{ true }; } // namespace detail //////////////////////////////////////////////////////////////////////////////// @@ -88,7 +86,7 @@ struct [[ clang::trivial_abi ]] pass_in_reg static auto constexpr pass_by_val{ can_be_passed_in_reg }; using value_type = T; - using stored_type = std::conditional_t>; + using stored_type = std::conditional_t::type>; constexpr pass_in_reg( T const & u ) noexcept : val{ u } {} @@ -115,11 +113,14 @@ struct [[ clang::trivial_abi ]] pass_rv_in_reg }; // pass_rv_in_reg template -concept KeyType = ( transparent_comparator && std::is_convertible_v ) || std::is_same_v; +concept LookupType = transparent_comparator || std::is_same_v; + +template +concept InsertableType = ( transparent_comparator && std::is_convertible_v ) || std::is_same_v; -template bool constexpr reg { false }; -template bool constexpr reg>{ true }; -template bool constexpr reg>{ true }; +template bool constexpr reg { can_be_passed_in_reg }; +template bool constexpr reg>{ true }; +template bool constexpr reg>{ true }; template concept Reg = reg; @@ -128,23 +129,24 @@ concept Reg = reg; //////////////////////////////////////////////////////////////////////////////// -// user specializations and overloads of this function are allowed -template -consteval bool use_linear_search_for_sorted_array( [[ maybe_unused ]] std::uint32_t const minimum_array_length, std::uint32_t const maximum_array_length ) noexcept +// user specializations are allowed and intended: + +template constexpr bool is_simple_comparator{ false }; +template constexpr bool is_simple_comparator>{ true }; +template constexpr bool is_simple_comparator>{ true }; + +template constexpr bool is_statically_sized { true }; +template requires requires{ T{}.size(); } constexpr bool is_statically_sized{ T{}.size() != 0 }; + +template +constexpr bool use_linear_search_for_sorted_array { - auto const basic_test - { - detail::is_simple_comparator && - std::is_trivially_copyable_v && - sizeof( Key ) < ( 4 * sizeof( void * ) ) && - maximum_array_length < 2048 - }; - if constexpr ( requires{ Key{}.size(); } ) - { - return basic_test && ( Key{}.size() != 0 ); - } - return basic_test; -} + ( is_simple_comparator ) && + ( std::is_trivially_copyable_v ) && + ( sizeof( Key ) < ( 4 * sizeof( void * ) ) ) && + ( maximum_array_length * sizeof( Key ) <= 4096 ) && + ( is_statically_sized ) +}; // use_linear_search_for_sorted_array //////////////////////////////////////////////////////////////////////////////// @@ -171,6 +173,10 @@ class bptree_base storage_result map_file( auto const file, flags::named_object_construction_policy const policy ) noexcept { storage_result success{ nodes_.map_file( file, policy ) }; +# ifndef NDEBUG + p_hdr_ = &hdr(); + p_nodes_ = nodes_.data(); +# endif if ( std::move( success ) && nodes_.empty() ) hdr() = {}; return success; @@ -178,7 +184,11 @@ class bptree_base storage_result map_memory( std::uint32_t initial_capacity_as_number_of_nodes = 0 ) noexcept; protected: +#if 0 // favoring CPU cache & branch prediction (linear scans) _vs_ TLB and disk access related issues, TODO make this configurable static constexpr auto node_size{ page_size }; +#else + static constexpr auto node_size{ 256 }; +#endif using depth_t = std::uint8_t; @@ -198,7 +208,8 @@ class bptree_base struct [[ nodiscard, clang::trivial_abi ]] node_header { - using size_type = std::uint16_t; + static auto constexpr minimum_header_size{ 3 * sizeof( node_slot ) + 2 * sizeof( /*minimum size_type*/ std::uint8_t ) }; + using size_type = typename boost::uint_value_t::least; // At minimum we need a single-linked/directed list in the vertical/depth // and horizontal/breadth directions (and the latter only for the leaf @@ -241,6 +252,7 @@ class bptree_base node_slot node {}; node_size_type value_offset{}; }; + class base_iterator; class base_random_access_iterator; @@ -263,6 +275,8 @@ class bptree_base void swap( bptree_base & other ) noexcept; base_iterator make_iter( iter_pos ) noexcept; + base_iterator make_iter( node_slot, node_size_type offset ) noexcept; + base_iterator make_iter( node_header const &, node_size_type offset ) noexcept; [[ gnu::pure ]] iter_pos begin_pos() const noexcept; [[ gnu::pure ]] iter_pos end_pos() const noexcept; @@ -301,10 +315,10 @@ class bptree_base //BOOST_ASSUME( node.num_vals >= node.min_values ); } - static constexpr auto keys ( auto & node ) noexcept { verify( node ); return std::span{ node.keys , node.num_vals }; } - static constexpr auto keys ( auto const & node ) noexcept { verify( node ); return std::span{ node.keys , node.num_vals }; } - static constexpr auto children( auto & node ) noexcept { verify( node ); if constexpr ( requires{ node.children; } ) return std::span{ node.children, node.num_vals + 1U }; else return std::array{}; } - static constexpr auto children( auto const & node ) noexcept { verify( node ); if constexpr ( requires{ node.children; } ) return std::span{ node.children, node.num_vals + 1U }; else return std::array{}; } + static constexpr auto keys ( auto & node ) noexcept { verify( node ); return std::span{ node.keys , static_cast( node.num_vals ) }; } + static constexpr auto keys ( auto const & node ) noexcept { verify( node ); return std::span{ node.keys , static_cast( node.num_vals ) }; } + static constexpr auto children( auto & node ) noexcept { verify( node ); if constexpr ( requires{ node.children; } ) return std::span{ node.children, static_cast( node.num_vals + 1U ) }; else return std::array{}; } + static constexpr auto children( auto const & node ) noexcept { verify( node ); if constexpr ( requires{ node.children; } ) return std::span{ node.children, static_cast( node.num_vals + 1U ) }; else return std::array{}; } [[ gnu::pure ]] static constexpr node_size_type num_vals ( auto const & node ) noexcept { return node.num_vals; } [[ gnu::pure ]] static constexpr node_size_type num_chldrn( auto const & node ) noexcept { if constexpr ( requires{ node.children; } ) { BOOST_ASSUME( node.num_vals ); return node.num_vals + 1U; } else return 0; } @@ -422,8 +436,9 @@ class bptree_base protected: node_pool nodes_; -#ifndef NDEBUG - header const * hdr_{}; +#ifndef NDEBUG // debugging helpers (undoing type erasure done by contiguous_container_storage_base) + header const * p_hdr_ {}; + node_placeholder const * p_nodes_{}; #endif }; // class bptree_base @@ -559,7 +574,7 @@ class bptree_base_wkey : public bptree_base std::uint8_t max_inner_node_count{ depth > 1 }; for ( auto d{ 3 }; d < depth; ++d ) { - max_inner_node_count += max_inner_node_count * inner_node::max_children; + max_inner_node_count += static_cast( max_inner_node_count * inner_node::max_children ); } BOOST_ASSUME( max_inner_node_count < n ); return ( n - max_inner_node_count ) * leaf_node::max_values; @@ -596,7 +611,7 @@ class bptree_base_wkey : public bptree_base Key keys [ max_values ]; node_slot children[ max_children ]; - }; // struct inner_node + }; // struct parent_node struct inner_node : parent_node { @@ -825,6 +840,8 @@ class bptree_base_wkey : public bptree_base protected: // 'other' + [[ gnu::pure, nodiscard ]] auto make_iter( auto const &... args ) noexcept { return static_cast( bptree_base::make_iter( args... ) ); } + template insert_pos_t insert( N & target_node, node_size_type const target_node_pos, key_rv_arg v, node_slot const right_child ) { @@ -911,7 +928,7 @@ class bptree_base_wkey : public bptree_base // maintaining that invariant (TODO make this an option). bool bulk_append_fill_incomplete_leaf( leaf_node & leaf ) noexcept { - auto const missing_keys{ static_cast( std::max( 0, signed( leaf.min_values ) - leaf.num_vals ) ) }; + auto const missing_keys{ static_cast( std::max( 0, signed( leaf.min_values - leaf.num_vals ) ) ) }; if ( missing_keys ) { auto & preceding{ left( leaf ) }; @@ -968,6 +985,9 @@ class bptree_base_wkey : public bptree_base bulk_copied_input bulk_insert_prepare( std::ranges::subrange keys ) { + if ( keys.empty() ) [[ unlikely ]] + return bulk_copied_input{}; + auto constexpr can_preallocate{ kind == std::ranges::subrange_kind::sized }; if constexpr ( can_preallocate ) reserve_additional( static_cast( keys.size() ) ); @@ -980,7 +1000,7 @@ class bptree_base_wkey : public bptree_base size_type count{ 0 }; for ( ;; ) { - leaf_node & leaf{ this->leaf( leaf_slot ) }; + auto & leaf{ this->leaf( leaf_slot ) }; BOOST_ASSUME( leaf.num_vals == 0 ); if constexpr ( can_preallocate ) { auto const size_to_copy{ static_cast( std::min( leaf.max_values, static_cast( keys.end() - p_keys ) ) ) }; @@ -995,6 +1015,7 @@ class bptree_base_wkey : public bptree_base } count += leaf.num_vals; } + BOOST_ASSUME( leaf.num_vals > 0 ); --this->hdr().free_node_count_; if ( p_keys != keys.end() ) { @@ -1027,9 +1048,12 @@ class bptree_base_wkey : public bptree_base auto * hdr{ &this->hdr() }; hdr->root_ = begin_leaf; hdr->first_leaf_ = begin_leaf; + BOOST_ASSUME( hdr->depth_ == 0 ); if ( begin_leaf == end_leaf.node ) [[ unlikely ]] // single-node-sized initial insert { hdr->last_leaf_ = end_leaf.node; + hdr->size_ = total_size; + hdr->depth_ = ( total_size != 0 ); return; } auto const & first_root_left { leaf ( begin_leaf ) }; @@ -1040,8 +1064,13 @@ class bptree_base_wkey : public bptree_base new_root( begin_leaf, first_root_left.right, key_rv_arg{ /*mrmlj*/Key{ first_root_right.keys[ 0 ] } } ); // may invalidate references hdr = &this->hdr(); BOOST_ASSUME( hdr->depth_ == 2 ); - bulk_append( &leaf( first_unconnected_node ), { hdr->root_, 1 } ); - BOOST_ASSUME( hdr->last_leaf_ == end_leaf.node ); + if ( first_unconnected_node ) { // first check if there are more than two nodes + bulk_append( &leaf( first_unconnected_node ), { hdr->root_, 1 } ); + BOOST_ASSUME( hdr->last_leaf_ == end_leaf.node ); + } else { + hdr->last_leaf_ = end_leaf.node; + BOOST_ASSUME( !!hdr->last_leaf_ ); + } hdr->size_ = total_size; } @@ -1337,27 +1366,36 @@ class bptree_base_wkey : public bptree_base parent.num_vals--; } + static void verify( auto const & node ) noexcept + { + BOOST_ASSERT( std::ranges::adjacent_find( keys( node ) ) == keys( node ).end() ); + bptree_base::verify( node ); + } + private: [[ gnu::const, gnu::noinline ]] static node_slot::value_type node_count_required_for_values( size_type const number_of_values ) noexcept { - if ( !number_of_values ) - return 0; + if ( number_of_values <= leaf_node::max_values ) + return ( number_of_values != 0 ); auto const leaf_count{ static_cast( divide_up( number_of_values, /*assuming an 'optimistic' reserve, i.e. for bulk insert*/leaf_node::max_values ) ) }; - auto total_count{ node_slot::value_type{ 0 } }; + auto total_count{ leaf_count }; auto current_level_count{ leaf_count }; auto depth{ 1 }; while ( current_level_count > 1 ) { - total_count += current_level_count; current_level_count = divide_up( current_level_count, inner_node::min_children ); // pessimistic about inner node utilization + total_count += current_level_count; ++depth; } - // theoretical (+1 since we use a 1-based depth index) - auto const minimum_height{ static_cast( 1 + std::ceil( std::log( number_of_values + 1 ) / std::log( inner_node::max_children ) ) - 1 ) }; - auto const maximum_height{ static_cast( 1 + std::log( ( number_of_values + 1 ) / 2 ) / std::log( inner_node::min_children ) ) }; + // +1 since we use a 1-based depth index (instead of a 0-based where -1 + // is used to denote an empty tree) + auto const minimum_height{ static_cast( 1 + std::ceil( std::log( leaf_count ) / std::log( inner_node::max_children ) ) ) }; + auto const maximum_height{ static_cast( 1 + std::ceil( std::log( leaf_count ) / std::log( inner_node::min_children ) ) ) }; BOOST_ASSUME( depth >= minimum_height ); BOOST_ASSUME( depth <= maximum_height ); + [[ maybe_unused ]] + auto tree_structure_overhead{ total_count - leaf_count }; return total_count; } }; // class bptree_base_wkey @@ -1464,7 +1502,7 @@ bptree_base_wkey::erase( const_iterator const iter ) noexcept auto & lf{ leaf( node ) }; if ( key_offset == 0 ) [[ unlikely ]] update_separator( lf, lf.keys[ 1 ] ); - return static_cast( make_iter( erase( lf, key_offset ) ) ); + return make_iter( erase( lf, key_offset ) ); } template @@ -1570,6 +1608,9 @@ class bp_tree using iterator = base:: iterator; using const_iterator = base::const_iterator; + using iter_pair = std::pair< iterator, iterator>; + using const_iter_pair = std::pair; + using base::empty; using base::size; using base::clear; @@ -1598,15 +1639,16 @@ class bp_tree auto random_access() noexcept { return std::ranges::subrange{ ra_begin(), ra_end(), size() }; } auto random_access() const noexcept { return std::ranges::subrange{ ra_begin(), ra_end(), size() }; } - [[ nodiscard ]] bool contains ( KeyType auto const & key ) const noexcept { return contains_impl ( pass_in_reg{ key } ); } - [[ nodiscard ]] iterator find ( KeyType auto const & key ) noexcept { return find_impl ( pass_in_reg{ key } ); } - [[ nodiscard ]] const_iterator find ( KeyType auto const & key ) const noexcept { return const_cast( *this ).find( key ); } - [[ nodiscard ]] iterator lower_bound( KeyType auto const & key ) noexcept { return lower_bound_impl( pass_in_reg{ key } ); } - [[ nodiscard ]] const_iterator lower_bound( KeyType auto const & key ) const noexcept { return const_cast( *this ).lower_bound( key ); } - - std::pair insert( KeyType auto const & key ) { return insert_impl( pass_in_reg{ key } ); } + [[ nodiscard ]] bool contains ( LookupType auto const & key ) const noexcept { return contains_impl ( pass_in_reg{ key } ); } + [[ nodiscard ]] iterator find ( LookupType auto const & key ) noexcept { return find_impl ( pass_in_reg{ key } ); } + [[ nodiscard ]] const_iterator find ( LookupType auto const & key ) const noexcept { return const_cast( *this ).find( key ); } + [[ nodiscard ]] iterator lower_bound( LookupType auto const & key ) noexcept { return lower_bound_impl( pass_in_reg{ key } ); } + [[ nodiscard ]] const_iterator lower_bound( LookupType auto const & key ) const noexcept { return const_cast( *this ).lower_bound( key ); } + [[ nodiscard ]] iter_pair equal_range( LookupType auto const & key ) noexcept { return equal_range_impl( pass_in_reg{ key } ); } + [[ nodiscard ]] const_iter_pair equal_range( LookupType auto const & key ) const noexcept { return const_cast( *this ).equal_range( key ); } - iterator insert( const_iterator const pos_hint, KeyType auto const & key ) { return insert_impl( pos_hint, pass_in_reg{ key } ); } + std::pair insert( InsertableType auto const & key ) { return insert_impl( pass_in_reg{ key } ); } + iterator insert( const_iterator const pos_hint, InsertableType auto const & key ) { return insert_impl( pos_hint, pass_in_reg{ key } ); } // bulk insert // performance note: insertion of existing values into a unique bp_tree is @@ -1661,7 +1703,7 @@ class bp_tree private: // pass-in-reg public function overloads/impls bool contains_impl( Reg auto const key ) const noexcept { return !empty() && const_cast( *this ).find_nodes_for( key ).leaf_offset.exact_find; } - [[ using gnu: noinline, pure, sysv_abi ]] + [[ using gnu: pure, sysv_abi ]] iterator find_impl( Reg auto const key ) noexcept { if ( !empty() ) [[ likely ]] @@ -1680,12 +1722,28 @@ class bp_tree if ( !empty() ) [[ likely ]] { auto const location{ find_nodes_for( key ) }; - return iterator{ this->nodes_, { slot_of( location.leaf ), location.leaf_offset.pos } }; + return base::make_iter( location.leaf, location.leaf_offset.pos ); } return this->end(); } + [[ using gnu: pure, sysv_abi ]] + iter_pair equal_range_impl( Reg auto const key ) noexcept + { + if ( !empty() ) [[ likely ]] + { + auto const location{ find_nodes_for( key ) }; + if ( location.leaf_offset.exact_find ) [[ likely ]] { + auto const begin{ base::make_iter( location.leaf, location.leaf_offset.pos ) }; + return { begin, std::next( begin ) }; + } + } + + auto const end_iter{ end() }; + return { end_iter, end_iter }; + } + std::pair insert_impl( Reg auto const v ) { if ( empty() ) @@ -1700,11 +1758,11 @@ class bp_tree BOOST_ASSUME( !locations.inner ); BOOST_ASSUME( !locations.inner_offset ); if ( locations.leaf_offset.exact_find ) [[ unlikely ]] - return { { this->nodes_, { slot_of( locations.leaf ), locations.leaf_offset.pos } }, false }; + return { base::make_iter( locations.leaf, locations.leaf_offset.pos ), false }; auto const insert_pos_next{ base::insert( locations.leaf, locations.leaf_offset.pos, Key{ v }, { /*insertion starts from leaves which do not have children*/ } ) }; ++this->hdr().size_; - return { std::prev( iterator{ this->nodes_, { insert_pos_next.node, insert_pos_next.next_insert_offset } } ), true }; + return { std::prev( base::make_iter( insert_pos_next.node, insert_pos_next.next_insert_offset ) ), true }; } iterator insert_impl( const_iterator const pos_hint, Reg auto const v ) @@ -1717,38 +1775,66 @@ class bp_tree auto const [hint_slot, hint_slot_offset]{ pos_hint.base().pos() }; auto const insert_pos_next{ base::insert( leaf( hint_slot ), hint_slot_offset, Key{ v }, { /*insertion starts from leaves which do not have children*/ } ) }; ++this->hdr().size_; - return std::prev( iterator{ this->nodes_, { insert_pos_next.node, insert_pos_next.next_insert_offset } } ); + return std::prev( base::make_iter( insert_pos_next.node, insert_pos_next.next_insert_offset ) ); } private: - // lower_bound find - struct find_pos // msvc pass-in-reg facepalm + // key_locations (containing find_pos) has to returnable through registers + struct find_pos0 // msvc pass-in-reg facepalm { node_size_type pos : ( sizeof( node_size_type ) * CHAR_BIT - 1 ); node_size_type exact_find : 1; }; - [[ using gnu: pure, hot, noinline, sysv_abi ]] - find_pos find( Key const keys[], node_size_type const num_vals, Reg auto const value ) const noexcept + struct find_pos1 { - // TODO branchless binary search, Alexandrescu's ideas, https://orlp.net/blog/bitwise-binary-search ... - BOOST_ASSUME( num_vals > 0 ); - auto const & __restrict comp{ this->comp() }; - node_size_type pos_idx; - if constexpr ( use_linear_search_for_sorted_array( 1, leaf_node::max_values ) ) + node_size_type pos; + bool exact_find; + }; + using find_pos = std::conditional_t + < + ( sizeof( find_pos1 ) > 2 ) && + ( leaf_node::max_values <= ( std::numeric_limits::max() / 2 ) ), + find_pos0, + find_pos1 + >; + + // lower_bound find + [[ using gnu: pure, hot, noinline, sysv_abi ]] + static find_pos find( Key const keys[], node_size_type const num_vals, Reg auto const value, pass_in_reg const comparator ) noexcept + { + // TODO branchless binary search, Alexandrescu's TLC, + // https://orlp.net/blog/bitwise-binary-search + // https://algorithmica.org/en/eytzinger + // FAST: Fast Architecture Sensitive Tree Search on Modern CPUs and GPUs http://kaldewey.com/pubs/FAST__SIGMOD10.pdf + // ... + BOOST_ASSUME( num_vals > 0 ); + BOOST_ASSUME( num_vals <= leaf_node::max_values ); + Comparator const & __restrict comp( comparator ); + if constexpr ( use_linear_search_for_sorted_array ) { - auto k{ 0 }; - while ( ( k != num_vals ) && comp( keys[ k ], value ) ) - ++k; - pos_idx = static_cast( k ); + for ( node_size_type k{ 0 }; ; ) + { + if ( !comp( keys[ k ], value ) ) + { + auto const exact_find{ !comp( value, keys[ k ] ) }; + return { k, exact_find }; + } + if ( ++k == num_vals ) + { + return { k, false }; + } + } } else { - auto const pos_iter{ std::lower_bound( &keys[ 0 ], &keys[ num_vals ], value, comp ) }; - pos_idx = static_cast( std::distance( &keys[ 0 ], pos_iter ) ); + auto const pos_iter { std::lower_bound( &keys[ 0 ], &keys[ num_vals ], value, comp ) }; + auto const pos_idx { static_cast( std::distance( &keys[ 0 ], pos_iter ) ) }; + auto const exact_find{ ( pos_idx != num_vals ) && !comp( value, keys[ pos_idx ] ) }; + return { pos_idx, exact_find }; } - auto const exact_find{ ( pos_idx != num_vals ) & !comp( value, keys[ std::min( pos_idx, num_vals - 1 ) ] ) }; - return { pos_idx, reinterpret_cast( exact_find ) }; + std::unreachable(); } + find_pos find( Key const keys[], node_size_type const num_vals, Reg auto const value ) const noexcept { return find( keys, num_vals, value, pass_in_reg{ comp() } ); } find_pos find( auto const & node, auto const & value ) const noexcept { return find( node.keys, node.num_vals, pass_in_reg{ value } ); } [[ using gnu: pure, hot, sysv_abi ]] find_pos find_with_offset( auto const & node, node_size_type const offset, Reg auto const value ) const noexcept @@ -1768,7 +1854,7 @@ class bp_tree node_slot inner; }; - [[ using gnu: pure, hot, sysv_abi ]] + [[ using gnu: pure, hot, sysv_abi, noinline ]] key_locations find_nodes_for( Reg auto const key ) noexcept { node_slot separator_key_node; @@ -1776,7 +1862,7 @@ class bp_tree // a leaf (lone) root is implicitly handled by the loop condition: // depth_ == 1 so the loop is skipped entirely and the lone root is // never examined through the incorrectly typed reference - auto p_node{ &bptree_base::as( root() ) }; + auto p_node{ &as( root() ) }; auto const depth { this->hdr().depth_ }; BOOST_ASSUME( depth >= 1 ); for ( auto level{ 0 }; level < depth - 1; ++level ) @@ -1801,7 +1887,7 @@ class bp_tree separator_key_node }; } - key_locations find_nodes_for( Key const & key ) noexcept { return find_nodes_for( pass_in_reg{ key } ); } + key_locations find_nodes_for( Key const & key ) noexcept { return find_nodes_for( key ); } auto find_next( leaf_node const & starting_leaf, node_size_type const starting_leaf_offset, Reg auto const key ) const noexcept { @@ -1920,7 +2006,7 @@ class bp_tree { if constexpr ( requires{ comp().eq( left, right ); } ) return comp().eq( left, right ); - if constexpr ( detail::is_simple_comparator && requires{ left == right; } ) + if constexpr ( is_simple_comparator && requires{ left == right; } ) return left == right; return !comp()( left, right ) && !comp()( right, left ); } @@ -1982,9 +2068,14 @@ auto bp_tree::merge // position in the target node that immediately follows the // position for the inserted src_keys[ 0 ] - IOW it need not be // the position for src.keys[ next_src_offset ] - if ( next_tgt_offset != tgt.num_vals ) // necessary check because find assumes non-empty input + if + ( + ( next_tgt_offset != tgt.num_vals ) && // necessary check because find assumes non-empty input + ( next_src_offset != src.num_vals ) && // possible edge case where there was actually only one key left in the source + false // not really worth it: the caller still has to call find on/for returns from all the other branches + ) { - next_tgt_offset = find_with_offset( tgt, next_tgt_offset, pass_in_reg{ src.keys[ next_src_offset ] } ).pos; + next_tgt_offset = find_with_offset( tgt, next_tgt_offset, key_const_arg{ src.keys[ next_src_offset ] } ).pos; } return std::make_tuple( 1, 1, &tgt, next_tgt_offset ); } @@ -1996,7 +2087,7 @@ auto bp_tree::merge if ( target.right ) { auto const & right_delimiter { right( target ).keys[ 0 ] }; - auto const less_than_right_pos{ find( src_keys, copy_size, pass_in_reg{ right_delimiter } ) }; + auto const less_than_right_pos{ find( src_keys, copy_size, key_const_arg{ right_delimiter } ) }; BOOST_ASSUME( !less_than_right_pos.exact_find ); if ( less_than_right_pos.pos != copy_size ) { @@ -2051,11 +2142,16 @@ bp_tree::insert( typename base::bulk_copied_input const input ) auto const [begin_leaf, end_pos, total_size]{ input }; ra_iterator const p_new_nodes_begin{ *this, { begin_leaf, 0 }, 0 }; ra_iterator const p_new_nodes_end { *this, end_pos , total_size }; +#if 0 // slower std::sort( p_new_nodes_begin, p_new_nodes_end, comp() ); +#else + boost::movelib::pdqsort( p_new_nodes_begin, p_new_nodes_end, comp() ); +#endif if ( empty() ) { base::bulk_insert_into_empty( begin_leaf, end_pos, total_size ); + BOOST_ASSUME( this->hdr().size_ == total_size ); return total_size; } @@ -2132,7 +2228,7 @@ bp_tree::insert( typename base::bulk_copied_input const input ) // that we are using presorted data) rather than starting everytime from // scratch (using find_nodes_for) std::tie( tgt_leaf, tgt_leaf_next_pos ) = - find_next( *tgt_leaf, tgt_next_offset, pass_in_reg{ src_leaf->keys[ source_slot_offset ] } ); + find_next( *tgt_leaf, tgt_next_offset, key_const_arg{ src_leaf->keys[ source_slot_offset ] } ); } while ( p_new_keys != p_new_nodes_end ); BOOST_ASSUME( inserted <= total_size ); diff --git a/src/containers/b+tree.cpp b/src/containers/b+tree.cpp index dfdf7f2..8aa603a 100644 --- a/src/containers/b+tree.cpp +++ b/src/containers/b+tree.cpp @@ -5,9 +5,11 @@ namespace psi::vm //------------------------------------------------------------------------------ // https://en.wikipedia.org/wiki/B-tree +// https://en.wikipedia.org/wiki/B%2B_tree // https://opendsa-server.cs.vt.edu/ODSA/Books/CS3/html/BTree.html // http://www.cburch.com/cs/340/reading/btree/index.html // https://www.programiz.com/dsa/b-plus-tree +// https://courses.cs.washington.edu/courses/cse332/23su/lectures/9_B_Trees.pdf (version, as this impl, which has different key counts in inner vs leaf nodes) // https://www.geeksforgeeks.org/b-trees-implementation-in-c // https://github.com/jeffplaisance/BppTree // https://flatcap.github.io/linux-ntfs/ntfs/concepts/tree/index.html @@ -17,6 +19,8 @@ namespace psi::vm // https://www.researchgate.net/publication/220225482_Cache-Oblivious_Databases_Limitations_and_Opportunities // https://www.postgresql.org/docs/current/btree.html // https://abseil.io/about/design/btree +// https://www.scylladb.com/2021/11/23/the-taming-of-the-b-trees +// https://www.scattered-thoughts.net/writing/smolderingly-fast-btrees // Data Structure Visualizations https://www.cs.usfca.edu/~galles/visualization/Algorithms.html // Griffin: Fast Transactional Database Index with Hash and B+-Tree https://ieeexplore.ieee.org/abstract/document/10678674 // Restructuring the concurrent B+-tree with non-blocked search operations https://www.sciencedirect.com/science/article/abs/pii/S002002550200261X @@ -37,6 +41,7 @@ void bptree_base::clear() noexcept std::span bptree_base::user_header_data() noexcept { return header_data().second; } +[[ gnu::pure, gnu::hot, gnu::always_inline ]] bptree_base::header & bptree_base::hdr() noexcept { return *header_data().first; } @@ -44,6 +49,10 @@ bptree_base::storage_result bptree_base::map_memory( std::uint32_t const initial_capacity_as_number_of_nodes ) noexcept { storage_result success{ nodes_.map_memory( initial_capacity_as_number_of_nodes, value_init ) }; +#ifndef NDEBUG + p_hdr_ = &hdr(); + p_nodes_ = nodes_.data(); +#endif if ( std::move( success ) ) { hdr() = {}; @@ -61,7 +70,8 @@ void bptree_base::reserve_additional( node_slot::value_type additional_nodes ) auto const current_size{ nodes_.size() }; nodes_.grow_by( additional_nodes, value_init ); #ifndef NDEBUG - hdr_ = &hdr(); + p_hdr_ = &hdr(); + p_nodes_ = nodes_.data(); #endif assign_nodes_to_free_pool( current_size ); } @@ -73,7 +83,8 @@ void bptree_base::reserve( node_slot::value_type new_capacity_in_number_of_nodes auto const current_size{ nodes_.size() }; nodes_.grow_to( new_capacity_in_number_of_nodes, value_init ); #ifndef NDEBUG - hdr_ = &hdr(); + p_hdr_ = &hdr(); + p_nodes_ = nodes_.data(); #endif assign_nodes_to_free_pool( current_size ); } @@ -335,11 +346,14 @@ void bptree_base::swap( bptree_base & other ) noexcept using std::swap; swap( this->nodes_, other.nodes_ ); #ifndef NDEBUG - swap( this->hdr_, other.hdr_ ); + swap( this->p_hdr_ , other.p_hdr_ ); + swap( this->p_nodes_, other.p_nodes_ ); #endif } bptree_base::base_iterator bptree_base::make_iter( iter_pos const pos ) noexcept { return { nodes_, pos }; } +bptree_base::base_iterator bptree_base::make_iter( node_slot const node, node_size_type const offset ) noexcept { return make_iter({ node, offset }); } +bptree_base::base_iterator bptree_base::make_iter( node_header const & node, node_size_type const offset ) noexcept { return make_iter( slot_of( node ), offset ); } [[ gnu::pure ]] bptree_base::iter_pos bptree_base::begin_pos() const noexcept { return { this->first_leaf(), 0 }; } [[ gnu::pure ]] bptree_base::iter_pos bptree_base:: end_pos() const noexcept { @@ -413,7 +427,8 @@ bptree_base::new_node() BOOST_ASSUME( !new_node.left ); BOOST_ASSUME( !new_node.right ); #ifndef NDEBUG - hdr_ = &this->hdr(); + p_hdr_ = &this->hdr(); + p_nodes_ = nodes_.data(); #endif return new_node; } diff --git a/test/b+tree.cpp b/test/b+tree.cpp index af80e76..9d8c923 100644 --- a/test/b+tree.cpp +++ b/test/b+tree.cpp @@ -2,9 +2,17 @@ #include #include +#include + +#define HAVE_ABSL 0 +#if HAVE_ABSL +#include +#endif #include +#include +#include #include #include #include @@ -14,6 +22,72 @@ namespace psi::vm { //------------------------------------------------------------------------------ +#ifdef NDEBUG // bench only release builds + +namespace +{ + using timer = std::chrono::high_resolution_clock; + using duration = std::chrono::nanoseconds; + + duration time_insertion( auto & container, auto const & data ) + { + auto const start{ timer::now() }; + container.insert( data.begin(), data.end() ); + return std::chrono::duration_cast( timer::now() - start ) / data.size(); + } + duration time_lookup( auto const & container, auto const & data ) noexcept + { + auto const start{ timer::now() }; + for ( auto const x : data ) { + EXPECT_EQ( *container.find( x ), x ); + } + return std::chrono::duration_cast( timer::now() - start ) / data.size(); + } +} // anonymous namespace + +TEST( bp_tree, benchamrk ) +{ + auto const test_size{ 7654321 }; + auto const seed{ std::random_device{}() }; + std::mt19937 rng{ seed }; + + std::ranges::iota_view constexpr sorted_numbers{ 0, test_size }; + auto numbers{ std::ranges::to( sorted_numbers ) }; + std::ranges::shuffle( numbers, rng ); + + psi::vm ::bp_tree bpt; bpt.map_memory(); + boost::container::flat_set flat_set; +#if HAVE_ABSL + absl ::btree_set abpt; +#endif + + // bulk-insertion-into-empty + auto const flat_set_insert{ time_insertion( flat_set, sorted_numbers ) }; + auto const bpt_insert { time_insertion( bpt , sorted_numbers ) }; +#if HAVE_ABSL + auto const abpt_insert { time_insertion( abpt , sorted_numbers ) }; +#endif + + // random lookup + auto const flat_set_find{ time_lookup( flat_set, numbers ) }; + auto const bpt_find{ time_lookup( bpt , numbers ) }; +#if HAVE_ABSL + auto const abpt_find{ time_lookup( abpt , numbers ) }; +#endif + + std::println( "insert / lookup:" ); + std::println( "\t boost::container::flat_set:\t{} / {}", flat_set_insert, flat_set_find ); + std::println( "\t psi::vm::bpt:\t{} / {}", bpt_insert, bpt_find ); +#if HAVE_ABSL + std::println( "\t absl::bpt:\t{} / {}", abpt_insert, abpt_find ); +#endif + +#if 0 // CI servers are unreliable for comparative perf tests + EXPECT_LE( bpt_find, flat_set_find ); +#endif +} // bp_tree.benchamrk +#endif // release build + static auto const test_file{ "test.bpt" }; TEST( bp_tree, playground ) @@ -26,7 +100,9 @@ TEST( bp_tree, playground ) auto const test_size{ 258735 }; #endif std::ranges::iota_view constexpr sorted_numbers{ 0, test_size }; - std::mt19937 rng{ std::random_device{}() }; + auto const seed{ std::random_device{}() }; + std::println( "Seed {}", seed ); + std::mt19937 rng{ seed }; auto numbers{ std::ranges::to( sorted_numbers ) }; std::span const nums{ numbers }; // leave the largest quarter of values at the end to trigger/exercise the