Skip to content

Commit

Permalink
Merge pull request #223 from eseiler/misc/rename
Browse files Browse the repository at this point in the history
[MISC] Rename filename_indices and refactor ctor
  • Loading branch information
smehringer authored Sep 10, 2024
2 parents f83ba1a + d22b073 commit bdeadea
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 24 deletions.
7 changes: 5 additions & 2 deletions include/hibf/build/update_user_bins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ namespace seqan::hibf::build
/*!\brief Updates user bins stored in HIBF.
* \ingroup hibf_build
*/
inline void update_user_bins(std::vector<uint64_t> & filename_indices, layout::layout::user_bin const & record)
inline void update_user_bins(std::vector<uint64_t> & technical_bin_to_user_bin_id,
layout::layout::user_bin const & record)
{
std::fill_n(filename_indices.begin() + record.storage_TB_id, record.number_of_technical_bins, record.idx);
std::fill_n(technical_bin_to_user_bin_id.begin() + record.storage_TB_id,
record.number_of_technical_bins,
record.idx);
}

} // namespace seqan::hibf::build
21 changes: 10 additions & 11 deletions include/hibf/hierarchical_interleaved_bloom_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ class hierarchical_interleaved_bloom_filter
size_t number_of_user_bins{};

/*!\brief Manages membership queries for the seqan::hibf::hierarchical_interleaved_bloom_filter.
* \see seqan::hibf::hierarchical_interleaved_bloom_filter::user_bins::filename_of_user_bin
* \details
* In contrast to the seqan::hibf::interleaved_bloom_filter, the result will consist of indices of user bins.
*/
Expand Down Expand Up @@ -289,19 +288,19 @@ class hierarchical_interleaved_bloom_filter::membership_agent_type
{
sum += result[bin];

auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const user_bin_id = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];

if (current_filename_index == bin_kind::merged) // merged bin
if (user_bin_id == bin_kind::merged) // merged bin
{
if (sum >= threshold)
membership_for_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
else if (bin + 1u == result.size() || // last bin
user_bin_id != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer.emplace_back(current_filename_index);
result_buffer.emplace_back(user_bin_id);
sum = 0u;
}
}
Expand Down Expand Up @@ -424,19 +423,19 @@ class hierarchical_interleaved_bloom_filter::counting_agent_type
for (size_t bin{}; bin < result.size(); ++bin)
{
sum += result[bin];
auto const current_filename_index = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];
auto const user_bin_id = hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin];

if (current_filename_index == bin_kind::merged) // merged bin
if (user_bin_id == bin_kind::merged) // merged bin
{
if (sum >= threshold)
bulk_count_impl(values, hibf_ptr->next_ibf_id[ibf_idx][bin], threshold);
sum = 0u;
}
else if (bin + 1u == result.size() || // last bin
current_filename_index != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
else if (bin + 1u == result.size() || // last bin
user_bin_id != hibf_ptr->ibf_bin_to_user_bin_id[ibf_idx][bin + 1]) // end of split bin
{
if (sum >= threshold)
result_buffer[current_filename_index] = sum;
result_buffer[user_bin_id] = sum;
sum = 0u;
}
}
Expand Down
26 changes: 15 additions & 11 deletions src/hierarchical_interleaved_bloom_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,24 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
{
size_t const ibf_pos{data.request_ibf_idx()};

std::vector<uint64_t> ibf_positions(current_node.number_of_technical_bins, ibf_pos);
std::vector<uint64_t> filename_indices(current_node.number_of_technical_bins, bin_kind::merged);
auto & technical_bin_to_ibf_id = hibf.next_ibf_id[ibf_pos];
assert(technical_bin_to_ibf_id.empty());
technical_bin_to_ibf_id.resize(current_node.number_of_technical_bins, ibf_pos);

auto & technical_bin_to_user_bin_id = hibf.ibf_bin_to_user_bin_id[ibf_pos];
assert(technical_bin_to_user_bin_id.empty());
technical_bin_to_user_bin_id.resize(current_node.number_of_technical_bins, bin_kind::merged);

auto & ibf = hibf.ibf_vector[ibf_pos];

robin_hood::unordered_flat_set<uint64_t> kmers{};

auto initialise_max_bin_kmers = [&]() -> size_t
{
if (current_node.max_bin_is_merged())
{
// recursively initialize favourite child first
ibf_positions[current_node.max_bin_index] =
technical_bin_to_ibf_id[current_node.max_bin_index] =
hierarchical_build(hibf,
kmers,
current_node.children[current_node.favourite_child_idx.value()],
Expand All @@ -68,15 +76,15 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
// we assume that the max record is at the beginning of the list of remaining records.
auto const & record = current_node.remaining_records[0];
build::compute_kmers(kmers, data, record);
build::update_user_bins(filename_indices, record);
build::update_user_bins(technical_bin_to_user_bin_id, record);

return record.number_of_technical_bins;
}
};

// initialize lower level IBF
size_t const max_bin_tbs = initialise_max_bin_kmers();
auto && ibf = construct_ibf(parent_kmers, kmers, max_bin_tbs, current_node, data, is_root);
ibf = construct_ibf(parent_kmers, kmers, max_bin_tbs, current_node, data, is_root);
kmers.clear(); // reduce memory peak

// parse all other children (merged bins) of the current ibf
Expand Down Expand Up @@ -121,7 +129,7 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
{
size_t const mutex_id{parent_bin_index / 64};
std::lock_guard<std::mutex> guard{local_ibf_mutex[mutex_id]};
ibf_positions[parent_bin_index] = ibf_pos;
technical_bin_to_ibf_id[parent_bin_index] = ibf_pos;
build::insert_into_ibf(kmers, 1, parent_bin_index, ibf, data.fill_ibf_timer);
if (!is_root)
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
Expand Down Expand Up @@ -153,14 +161,10 @@ size_t hierarchical_build(hierarchical_interleaved_bloom_filter & hibf,
build::update_parent_kmers(parent_kmers, kmers, data.merge_kmers_timer);
}

build::update_user_bins(filename_indices, record);
build::update_user_bins(technical_bin_to_user_bin_id, record);
kmers.clear();
}

hibf.ibf_vector[ibf_pos] = std::move(ibf);
hibf.next_ibf_id[ibf_pos] = std::move(ibf_positions);
hibf.ibf_bin_to_user_bin_id[ibf_pos] = std::move(filename_indices);

return ibf_pos;
}

Expand Down

0 comments on commit bdeadea

Please sign in to comment.