Skip to content

Commit

Permalink
Fix the lazy block prefilter (#1657)
Browse files Browse the repository at this point in the history
This fixes a bug introduced in #1647, which led to a failure of ` AD_CORRECTNESS_CHECK(!prefetchedValues.empty() || innerState->doneFetching_);` in `IndexScan::createPrefilteredJoinSide`.
  • Loading branch information
joka921 authored Dec 4, 2024
1 parent 23f8075 commit ec806f0
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 31 deletions.
66 changes: 35 additions & 31 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,39 +532,43 @@ struct IndexScan::SharedGeneratorState {
}

// Consume the next non-empty table from the generator and calculate the next
// matching blocks from the index scan.
// matching blocks from the index scan. This function guarantees that after
// it returns, both `prefetchedValues` and `pendingBlocks` contain at least
// one element.
void fetch() {
advanceInputToNextNonEmptyTable();
if (doneFetching_) {
return;
}
auto& idTable = iterator_.value()->idTable_;
auto joinColumn = idTable.getColumn(joinColumn_);
AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn));
AD_CORRECTNESS_CHECK(!joinColumn.empty());
// Skip processing for undef case, it will be handled differently
if (hasUndef_) {
return;
}
AD_CORRECTNESS_CHECK(!joinColumn[0].isUndefined());
auto newBlocks =
CompressedRelationReader::getBlocksForJoin(joinColumn, metaBlocks_);
if (newBlocks.empty()) {
// The current input table matches no blocks, so we don't have to yield
// it.
return;
while (prefetchedValues_.empty() || pendingBlocks_.empty()) {
advanceInputToNextNonEmptyTable();
if (doneFetching_) {
return;
}
auto& idTable = iterator_.value()->idTable_;
auto joinColumn = idTable.getColumn(joinColumn_);
AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn));
AD_CORRECTNESS_CHECK(!joinColumn.empty());
// Skip processing for undef case, it will be handled differently
if (hasUndef_) {
return;
}
AD_CORRECTNESS_CHECK(!joinColumn[0].isUndefined());
auto newBlocks =
CompressedRelationReader::getBlocksForJoin(joinColumn, metaBlocks_);
if (newBlocks.empty()) {
// The current input table matches no blocks, so we don't have to yield
// it.
continue;
}
prefetchedValues_.push_back(std::move(*iterator_.value()));
// Find first value that differs from the last one that was used to find
// matching blocks.
auto startIterator =
lastBlockIndex_.has_value()
? std::ranges::upper_bound(newBlocks, lastBlockIndex_.value(), {},
&CompressedBlockMetadata::blockIndex_)
: newBlocks.begin();
lastBlockIndex_ = newBlocks.back().blockIndex_;
std::ranges::move(startIterator, newBlocks.end(),
std::back_inserter(pendingBlocks_));
}
prefetchedValues_.push_back(std::move(*iterator_.value()));
// Find first value that differs from the last one that was used to find
// matching blocks.
auto startIterator =
lastBlockIndex_.has_value()
? std::ranges::upper_bound(newBlocks, lastBlockIndex_.value(), {},
&CompressedBlockMetadata::blockIndex_)
: newBlocks.begin();
lastBlockIndex_ = newBlocks.back().blockIndex_;
std::ranges::move(startIterator, newBlocks.end(),
std::back_inserter(pendingBlocks_));
}

// Check if there are any undefined values yielded by the original generator.
Expand Down
2 changes: 2 additions & 0 deletions test/engine/IndexScanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,8 @@ TEST_P(IndexScanWithLazyJoin,
using P = Result::IdTableVocabPair;
P p1{makeIdTableFromVector({{Id::makeFromBool(true)}}), LocalVocab{}};
co_yield p1;
P p2{makeIdTableFromVector({{Id::makeFromBool(true)}}), LocalVocab{}};
co_yield p2;
};

auto [joinSideResults, scanResults] =
Expand Down

0 comments on commit ec806f0

Please sign in to comment.