Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite local vocab entries and blank nodes for DeltaTriples #1580

Merged
merged 9 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/engine/LocalVocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,12 @@ BlankNodeIndex LocalVocab::getBlankNodeIndex(
}
return BlankNodeIndex::make(localBlankNodeManager_->getId());
}

// _____________________________________________________________________________
bool LocalVocab::isBlankNodeIndexContained(
BlankNodeIndex blankNodeIndex) const {
if (!localBlankNodeManager_) {
return false;
}
return localBlankNodeManager_->containsBlankNodeIndex(blankNodeIndex.get());
}
4 changes: 4 additions & 0 deletions src/engine/LocalVocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ class LocalVocab {
[[nodiscard]] BlankNodeIndex getBlankNodeIndex(
ad_utility::BlankNodeManager* blankNodeManager);

// Return true iff the given `blankNodeIndex` is one that was previously
// generated by the blank node manager of this local vocab.
bool isBlankNodeIndexContained(BlankNodeIndex blankNodeIndex) const;

private:
// Common implementation for the two variants of
// `getIndexAndAddIfNotContainedImpl` above.
Expand Down
56 changes: 56 additions & 0 deletions src/index/DeltaTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,67 @@ void DeltaTriples::deleteTriples(CancellationHandle cancellationHandle,
triplesDeleted_, triplesInserted_);
}

// ____________________________________________________________________________
void DeltaTriples::rewriteLocalVocabEntriesAndBlankNodes(Triples& triples) {
// Remember which original blank node (from the parsing of an insert
// operation) is mapped to which blank node managed by the `localVocab_` of
// this class.
ad_utility::HashMap<Id, Id> blankNodeMap;
// For the given original blank node `id`, check if it has already been
// mapped. If not, map it to a new blank node managed by the `localVocab_` of
// this class. Either way, return the (already existing or newly created)
// value.
auto getLocalBlankNode = [this, &blankNodeMap](Id id) {
AD_CORRECTNESS_CHECK(id.getDatatype() == Datatype::BlankNodeIndex);
// The following code handles both cases (already mapped or not) with a
// single lookup in the map. Note that the value of the `try_emplace` call
// is irrelevant.
auto [it, newElement] = blankNodeMap.try_emplace(id, Id::makeUndefined());
if (newElement) {
it->second = Id::makeFromBlankNodeIndex(
localVocab_.getBlankNodeIndex(index_.getBlankNodeManager()));
}
return it->second;
};

// Return true iff `blankNodeIndex` is a blank node index from the original
// index.
auto isGlobalBlankNode = [minLocalBlankNode =
index_.getBlankNodeManager()->minIndex_](
BlankNodeIndex blankNodeIndex) {
return blankNodeIndex.get() < minLocalBlankNode;
};

// Helper lambda that converts a single local vocab or blank node `id` as
// described in the comment for this function. All other types are left
// unchanged.
auto convertId = [this, isGlobalBlankNode, &getLocalBlankNode](Id& id) {
if (id.getDatatype() == Datatype::LocalVocabIndex) {
id = Id::makeFromLocalVocabIndex(
localVocab_.getIndexAndAddIfNotContained(*id.getLocalVocabIndex()));
} else if (id.getDatatype() == Datatype::BlankNodeIndex) {
auto idx = id.getBlankNodeIndex();
if (isGlobalBlankNode(idx) ||
localVocab_.isBlankNodeIndexContained(idx)) {
return;
}
id = getLocalBlankNode(id);
}
};

// Convert all local vocab and blank node `Id`s in all `triples`.
std::ranges::for_each(triples, [&convertId](IdTriple<0>& triple) {
std::ranges::for_each(triple.ids_, convertId);
std::ranges::for_each(triple.payload_, convertId);
});
}

// ____________________________________________________________________________
void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle,
Triples triples, bool shouldExist,
TriplesToHandlesMap& targetMap,
TriplesToHandlesMap& inverseMap) {
rewriteLocalVocabEntriesAndBlankNodes(triples);
std::ranges::sort(triples);
auto [first, last] = std::ranges::unique(triples);
triples.erase(first, last);
Expand Down
11 changes: 11 additions & 0 deletions src/index/DeltaTriples.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
class DeltaTriples {
FRIEND_TEST(DeltaTriplesTest, insertTriplesAndDeleteTriples);
FRIEND_TEST(DeltaTriplesTest, clear);
FRIEND_TEST(DeltaTriplesTest, addTriplesToLocalVocab);

private:
// The index to which these triples are added.
Expand Down Expand Up @@ -123,6 +124,16 @@ class DeltaTriples {
bool shouldExist, TriplesToHandlesMap& targetMap,
TriplesToHandlesMap& inverseMap);

// Rewrite each triple in `triples` such that all local vocab entries and all
// local blank nodes are managed by the `localVocab_` of this class.
//
// NOTE: This is important for two reasons: (1) It avoids duplicates for
// successive insertions referring to the same local vocab entries; (2) It
// avoids storing local vocab entries or blank nodes that were created only
// temporarily when evaluating the WHERE clause of an update query.
void rewriteLocalVocabEntriesAndBlankNodes(Triples& triples);
FRIEND_TEST(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes);

// Erase `LocatedTriple` object from each `LocatedTriplesPerBlock` list. The
// argument are iterators for each list, as returned by the method
// `locateTripleInAllPermutations` above.
Expand Down
12 changes: 10 additions & 2 deletions src/util/BlankNodeManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ BlankNodeManager::Block BlankNodeManager::allocateBlock() {

// _____________________________________________________________________________
BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex)
: blockIdx_(blockIndex), nextIdx_(startIndex) {}
: blockIdx_(blockIndex), startIdx_(startIndex), nextIdx_(startIndex) {}

// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager(
Expand All @@ -44,7 +44,7 @@ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager(
// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() {
auto ptr = blankNodeManager_->usedBlocksSet_.wlock();
for (auto block : blocks_) {
for (const auto& block : blocks_) {
AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_));
ptr->erase(block.blockIdx_);
}
Expand All @@ -59,4 +59,12 @@ uint64_t BlankNodeManager::LocalBlankNodeManager::getId() {
return blocks_.back().nextIdx_++;
}

// _____________________________________________________________________________
bool BlankNodeManager::LocalBlankNodeManager::containsBlankNodeIndex(
uint64_t index) const {
return std::ranges::any_of(blocks_, [index](const Block& block) {
return index >= block.startIdx_ && index < block.nextIdx_;
});
}

} // namespace ad_utility
17 changes: 12 additions & 5 deletions src/util/BlankNodeManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,31 +60,38 @@ class BlankNodeManager {
~Block() = default;
// The index of this block.
const uint64_t blockIdx_;

// The first index within this block
const uint64_t startIdx_;
// The next free index within this block.
uint64_t nextIdx_;
};

// Manages the BlankNodes used within a LocalVocab.
// Manages the blank nodes for a single local vocab.
class LocalBlankNodeManager {
public:
explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager);
~LocalBlankNodeManager();

// No copy, as the managed blocks shall not be duplicated.
// No copy, as the managed blocks should not be duplicated.
LocalBlankNodeManager(const LocalBlankNodeManager&) = delete;
LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete;

// Move is allowed.
LocalBlankNodeManager(LocalBlankNodeManager&&) = default;
LocalBlankNodeManager& operator=(LocalBlankNodeManager&&) = default;

// Get a new id.
// Get a new blank node index.
[[nodiscard]] uint64_t getId();

// Return true iff the `index` was returned by a previous call to `getId()`.
bool containsBlankNodeIndex(uint64_t index) const;

private:
// Reserved blocks.
std::vector<BlankNodeManager::Block> blocks_;

// Reference of the BlankNodeManager, used to free the reserved blocks.
// Reference to the BlankNodeManager, used to free the reserved blocks.
BlankNodeManager* blankNodeManager_;

// The first index after the current Block.
Expand All @@ -93,7 +100,7 @@ class BlankNodeManager {
FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID);
};

// Allocate and retrieve a block of free ids.
// Allocate and retrieve a block of new blank node indexes.
[[nodiscard]] Block allocateBlock();

FRIEND_TEST(BlankNodeManager, blockAllocationAndFree);
Expand Down
4 changes: 4 additions & 0 deletions test/BlankNodeManagerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,14 @@ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) {
// no blocks are allocated yet
uint64_t id = l.getId();
EXPECT_EQ(l.blocks_.size(), 1);
EXPECT_TRUE(l.containsBlankNodeIndex(id));
EXPECT_FALSE(l.containsBlankNodeIndex(id + 1));
EXPECT_FALSE(l.containsBlankNodeIndex(id - 1));

// or the ids of the last block are all used
l.blocks_.back().nextIdx_ = id + BlankNodeManager::blockSize_;
id = l.getId();
EXPECT_TRUE(l.containsBlankNodeIndex(id));
EXPECT_EQ(l.blocks_.size(), 2);
}

Expand Down
79 changes: 78 additions & 1 deletion test/DeltaTriplesTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ class DeltaTriplesTest : public ::testing::Test {
"<b> <prev> <a> . "
"<c> <prev> <b> . "
"<B> <prev> <A> . "
"<C> <prev> <B>";
"<C> <prev> <B> . "
"<anon> <x> _:blubb";

// Query execution context with index for testing, see `IndexTestHelpers.h`.
QueryExecutionContext* testQec = ad_utility::testing::getQec(testTurtle);
Expand Down Expand Up @@ -275,3 +276,79 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) {
{"<A> <B> <D>", "<A> <next> <B>", "<B> <next> <C>",
"<C> <prev> <B>", "<B> <prev> <A>"}));
}

// Test the rewriting of local vocab entries and blank nodes.
TEST_F(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes) {
// Create a triple with a new local vocab entry and a new blank node. Use the
// same new blank node twice (as object ID and graph ID, not important) so
// that we can test that both occurrences are rewritten to the same new blank
// node.
DeltaTriples deltaTriples(testQec->getIndex());
auto& vocab = testQec->getIndex().getVocab();
LocalVocab localVocabOutside;
auto triples =
makeIdTriples(vocab, localVocabOutside, {"<A> <notInVocab> <B>"});
AD_CORRECTNESS_CHECK(triples.size() == 1);
triples[0].ids_[2] =
Id::makeFromBlankNodeIndex(BlankNodeIndex::make(999'888'777));
triples[0].ids_[3] = triples[0].ids_[2];
auto [s1, p1, o1, g1] = triples[0].ids_;

// Rewrite the IDs in the triple.
deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples);
auto [s2, p2, o2, g2] = triples[0].ids_;

// The subject <A> is part of the global vocabulary, so it remains unchanged.
EXPECT_EQ(s2.getBits(), s1.getBits());

// The predicate `<notInVocab>` is part of the local vocab, so it gets
// rewritten, hence the `EXPECT_NE(p2, p1)`. The `EXPECT_EQ(p1, p2)` tests
// that the strings are equal (which they should be).
ASSERT_TRUE(p1.getDatatype() == Datatype::LocalVocabIndex);
ASSERT_TRUE(p2.getDatatype() == Datatype::LocalVocabIndex);
EXPECT_EQ(p1, p2);
EXPECT_NE(p2.getBits(), p1.getBits());

// Test that the rewritten ID is stored (and thereby kept alive) by the
// local vocab of the `DeltaTriples` class.
auto& localVocab = deltaTriples.localVocab_;
auto idx = p2.getLocalVocabIndex();
EXPECT_EQ(idx, localVocab.getIndexOrNullopt(*idx));

// Check that the blank node is rewritten (it gets a new blank node index,
// and hence also a new ID).
ASSERT_TRUE(o1.getDatatype() == Datatype::BlankNodeIndex);
ASSERT_TRUE(o2.getDatatype() == Datatype::BlankNodeIndex);
EXPECT_NE(o2, o1);
EXPECT_NE(o2.getBits(), o1.getBits());

// Same for the graph blank node.
ASSERT_TRUE(g1.getDatatype() == Datatype::BlankNodeIndex);
ASSERT_TRUE(g2.getDatatype() == Datatype::BlankNodeIndex);
EXPECT_NE(g2, g1);
EXPECT_NE(g2.getBits(), g1.getBits());

// The object and the graph ID were the same blank node, so they should
// be rewritten to the same new ID.
EXPECT_EQ(g1.getBits(), o1.getBits());
EXPECT_EQ(g2.getBits(), o2.getBits());

// If we rewrite the already written triples again, nothing should change,
// as the `LocalVocab` of the `DeltaTriples` class is aware that it already
// stores the corresponding values.
deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples);
ASSERT_EQ(triples.size(), 1);
auto [s3, p3, o3, g3] = triples[0].ids_;
EXPECT_EQ(s3.getBits(), s2.getBits());
EXPECT_EQ(p3.getBits(), p2.getBits());
EXPECT_EQ(o3.getBits(), o2.getBits());
EXPECT_EQ(g3.getBits(), g2.getBits());

// If we use a local blank node that is already part of the global vocabulary,
// nothing gets rewritten either.
auto blank0 = Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0));
triples[0].ids_[0] = blank0;
deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples);
auto s4 = triples[0].ids_[0];
EXPECT_EQ(s4.getBits(), blank0.getBits());
}
Loading