Skip to content

Commit

Permalink
Use the augmented metadata when performing UPDATE (#1643)
Browse files Browse the repository at this point in the history
Since #1379, the located triples for each permutation have their own "augmented" block metadata (an update can change the first or last triple of a block). But so far, all index scan operations still use the original block metadata. Now they use the augmented block metadata when appropriate. For the two internal permutations, we assume that they are not affected by updates and we always use their original block metadata.
  • Loading branch information
joka921 authored Nov 28, 2024
1 parent 0ce9a39 commit 0d537b0
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 23 deletions.
9 changes: 9 additions & 0 deletions src/index/DeltaTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,12 @@ void DeltaTriplesManager::clear() { modify(&DeltaTriples::clear); }
SharedLocatedTriplesSnapshot DeltaTriplesManager::getCurrentSnapshot() const {
return *currentLocatedTriplesSnapshot_.rlock();
}

// _____________________________________________________________________________
void DeltaTriples::setOriginalMetadata(
Permutation::Enum permutation,
std::vector<CompressedBlockMetadata> metadata) {
locatedTriples()
.at(static_cast<size_t>(permutation))
.setOriginalMetadata(std::move(metadata));
}
5 changes: 5 additions & 0 deletions src/index/DeltaTriples.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ class DeltaTriples {
// `DeltaTriples` object.
SharedLocatedTriplesSnapshot getSnapshot() const;

// Register the original `metadata` for the given `permutation`. This has to
// be called before any updates are processed.
void setOriginalMetadata(Permutation::Enum permutation,
std::vector<CompressedBlockMetadata> metadata);

private:
// Find the position of the given triple in the given permutation and add it
// to each of the six `LocatedTriplesPerBlock` maps (one per permutation).
Expand Down
31 changes: 25 additions & 6 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -882,14 +882,33 @@ void IndexImpl::createFromOnDiskIndex(const string& onDiskBase) {
range2.contain(id.getVocabIndex());
};

pso_.loadFromDisk(onDiskBase_, isInternalId, true);
pos_.loadFromDisk(onDiskBase_, isInternalId, true);
// Load the permutations and register the original metadata for the delta
// triples.
// TODO<joka921> We could delegate the setting of the metadata to the
// `Permutation`class, but we first have to deal with The delta triples for
// the additional permutations.
auto setMetadata = [this](const Permutation& p) {
deltaTriplesManager().modify([&p](DeltaTriples& deltaTriples) {
deltaTriples.setOriginalMetadata(p.permutation(),
p.metaData().blockData());
});
};

auto load = [this, &isInternalId, &setMetadata](
Permutation& permutation,
bool loadInternalPermutation = false) {
permutation.loadFromDisk(onDiskBase_, isInternalId,
loadInternalPermutation);
setMetadata(permutation);
};

load(pso_, true);
load(pos_, true);
if (loadAllPermutations_) {
ops_.loadFromDisk(onDiskBase_, isInternalId);
osp_.loadFromDisk(onDiskBase_, isInternalId);
spo_.loadFromDisk(onDiskBase_, isInternalId);
sop_.loadFromDisk(onDiskBase_, isInternalId);
load(ops_);
load(osp_);
load(spo_);
load(sop_);
} else {
AD_LOG_INFO
<< "Only the PSO and POS permutation were loaded, SPARQL queries "
Expand Down
57 changes: 41 additions & 16 deletions src/index/Permutation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void Permutation::loadFromDisk(const std::string& onDiskBase,
internalPermutation_->loadFromDisk(
absl::StrCat(onDiskBase, QLEVER_INTERNAL_INDEX_INFIX), isInternalId_,
false);
internalPermutation_->isInternalPermutation_ = true;
}
if constexpr (MetaData::isMmapBased_) {
meta_.setup(onDiskBase + ".index" + fileSuffix_ + MMAP_FILE_SUFFIX,
Expand Down Expand Up @@ -65,8 +66,9 @@ IdTable Permutation::scan(const ScanSpecification& scanSpec,
const auto& p = getActualPermutation(scanSpec);

return p.reader().scan(
scanSpec, p.meta_.blockData(), additionalColumns, cancellationHandle,
getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
scanSpec, p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot),
additionalColumns, cancellationHandle,
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
}

// _____________________________________________________________________
Expand All @@ -75,8 +77,8 @@ size_t Permutation::getResultSizeOfScan(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
const auto& p = getActualPermutation(scanSpec);
return p.reader().getResultSizeOfScan(
scanSpec, p.meta_.blockData(),
getLocatedTriplesForPermutation(locatedTriplesSnapshot));
scanSpec, p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot),
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot));
}

// _____________________________________________________________________
Expand All @@ -85,8 +87,8 @@ std::pair<size_t, size_t> Permutation::getSizeEstimateForScan(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
const auto& p = getActualPermutation(scanSpec);
return p.reader().getSizeEstimateForScan(
scanSpec, p.meta_.blockData(),
getLocatedTriplesForPermutation(locatedTriplesSnapshot));
scanSpec, p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot),
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot));
}

// ____________________________________________________________________________
Expand All @@ -95,16 +97,18 @@ IdTable Permutation::getDistinctCol1IdsAndCounts(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
const auto& p = getActualPermutation(col0Id);
return p.reader().getDistinctCol1IdsAndCounts(
col0Id, p.meta_.blockData(), cancellationHandle,
getLocatedTriplesForPermutation(locatedTriplesSnapshot));
col0Id, p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot),
cancellationHandle,
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot));
}

// ____________________________________________________________________________
IdTable Permutation::getDistinctCol0IdsAndCounts(
const CancellationHandle& cancellationHandle,
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
return reader().getDistinctCol0IdsAndCounts(
meta_.blockData(), cancellationHandle,
getAugmentedMetadataForPermutation(locatedTriplesSnapshot),
cancellationHandle,
getLocatedTriplesForPermutation(locatedTriplesSnapshot));
}

Expand Down Expand Up @@ -156,8 +160,8 @@ std::optional<CompressedRelationMetadata> Permutation::getMetadata(
return p.meta_.getMetaData(col0Id);
}
return p.reader().getMetadataForSmallRelation(
p.meta_.blockData(), col0Id,
getLocatedTriplesForPermutation(locatedTriplesSnapshot));
p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot), col0Id,
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot));
}

// _____________________________________________________________________
Expand All @@ -167,10 +171,11 @@ std::optional<Permutation::MetadataAndBlocks> Permutation::getMetadataAndBlocks(
const auto& p = getActualPermutation(scanSpec);
CompressedRelationReader::ScanSpecAndBlocks mb{
scanSpec, CompressedRelationReader::getRelevantBlocks(
scanSpec, p.meta_.blockData())};
scanSpec, p.getAugmentedMetadataForPermutation(
locatedTriplesSnapshot))};

auto firstAndLastTriple = p.reader().getFirstAndLastTriple(
mb, getLocatedTriplesForPermutation(locatedTriplesSnapshot));
mb, p.getLocatedTriplesForPermutation(locatedTriplesSnapshot));
if (!firstAndLastTriple.has_value()) {
return std::nullopt;
}
Expand All @@ -189,14 +194,14 @@ Permutation::IdTableGenerator Permutation::lazyScan(
const auto& p = getActualPermutation(scanSpec);
if (!blocks.has_value()) {
auto blockSpan = CompressedRelationReader::getRelevantBlocks(
scanSpec, p.meta_.blockData());
scanSpec, p.getAugmentedMetadataForPermutation(locatedTriplesSnapshot));
blocks = std::vector(blockSpan.begin(), blockSpan.end());
}
ColumnIndices columns{additionalColumns.begin(), additionalColumns.end()};
return p.reader().lazyScan(
scanSpec, std::move(blocks.value()), std::move(columns),
std::move(cancellationHandle),
getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
p.getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset);
}

// ______________________________________________________________________
Expand Down Expand Up @@ -225,8 +230,28 @@ const Permutation& Permutation::getActualPermutation(Id id) const {
ScanSpecification{id, std::nullopt, std::nullopt});
}

// TODO<joka921> The following two functions always assume that there were no
// updates to the additional triples (which is technically true for now, because
// we never modify the additional triples with the delta triples, because there
// is some functionality missing for this. We have to fix this here and in the
// `DeltaTriples` class.

// ______________________________________________________________________
const LocatedTriplesPerBlock& Permutation::getLocatedTriplesForPermutation(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
return locatedTriplesSnapshot.getLocatedTriplesForPermutation(permutation_);
static const LocatedTriplesSnapshot emptySnapshot;
const auto& actualSnapshot =
isInternalPermutation_ ? emptySnapshot : locatedTriplesSnapshot;
return actualSnapshot.getLocatedTriplesForPermutation(permutation_);
}

// ______________________________________________________________________
const std::vector<CompressedBlockMetadata>&
Permutation::getAugmentedMetadataForPermutation(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const {
if (isInternalPermutation_) {
return meta_.blockData();
}
return getLocatedTriplesForPermutation(locatedTriplesSnapshot)
.getAugmentedMetadata();
}
10 changes: 10 additions & 0 deletions src/index/Permutation.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,16 @@ class Permutation {
const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;

// From the given snapshot, get the augmented block metadata for this
// permutation.
const std::vector<CompressedBlockMetadata>&
getAugmentedMetadataForPermutation(
const LocatedTriplesSnapshot& locatedTriplesSnapshot) const;

const CompressedRelationReader& reader() const { return reader_.value(); }

Enum permutation() const { return permutation_; }

private:
// Readable name for this permutation, e.g., `POS`.
std::string readableName_;
Expand All @@ -185,4 +193,6 @@ class Permutation {
std::unique_ptr<Permutation> internalPermutation_ = nullptr;

std::function<bool(Id)> isInternalId_;

bool isInternalPermutation_ = false;
};
2 changes: 1 addition & 1 deletion test/util/IndexTestHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ namespace {
// folded into the permutations as additional columns.
void checkConsistencyBetweenPatternPredicateAndAdditionalColumn(
const Index& index) {
DeltaTriplesManager deltaTriplesManager(index.getImpl());
const DeltaTriplesManager& deltaTriplesManager{index.deltaTriplesManager()};
auto sharedLocatedTriplesSnapshot = deltaTriplesManager.getCurrentSnapshot();
const auto& locatedTriplesSnapshot = *sharedLocatedTriplesSnapshot;
static constexpr size_t col0IdTag = 43;
Expand Down

0 comments on commit 0d537b0

Please sign in to comment.