diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index afa603fba3..1dbc8e7c68 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -88,3 +88,12 @@ BlankNodeIndex LocalVocab::getBlankNodeIndex( } return BlankNodeIndex::make(localBlankNodeManager_->getId()); } + +// _____________________________________________________________________________ +bool LocalVocab::isBlankNodeIndexContained( + BlankNodeIndex blankNodeIndex) const { + if (!localBlankNodeManager_) { + return false; + } + return localBlankNodeManager_->containsBlankNodeIndex(blankNodeIndex.get()); +} diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 5e6c98296a..3055c400a6 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -109,6 +109,10 @@ class LocalVocab { [[nodiscard]] BlankNodeIndex getBlankNodeIndex( ad_utility::BlankNodeManager* blankNodeManager); + // Return true iff the given `blankNodeIndex` is one that was previously + // generated by the blank node manager of this local vocab. + bool isBlankNodeIndexContained(BlankNodeIndex blankNodeIndex) const; + private: // Common implementation for the two variants of // `getIndexAndAddIfNotContainedImpl` above. diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index 79e4a4503d..a07d342ec6 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -85,11 +85,67 @@ void DeltaTriples::deleteTriples(CancellationHandle cancellationHandle, triplesDeleted_, triplesInserted_); } +// ____________________________________________________________________________ +void DeltaTriples::rewriteLocalVocabEntriesAndBlankNodes(Triples& triples) { + // Remember which original blank node (from the parsing of an insert + // operation) is mapped to which blank node managed by the `localVocab_` of + // this class. + ad_utility::HashMap blankNodeMap; + // For the given original blank node `id`, check if it has already been + // mapped. If not, map it to a new blank node managed by the `localVocab_` of + // this class. Either way, return the (already existing or newly created) + // value. + auto getLocalBlankNode = [this, &blankNodeMap](Id id) { + AD_CORRECTNESS_CHECK(id.getDatatype() == Datatype::BlankNodeIndex); + // The following code handles both cases (already mapped or not) with a + // single lookup in the map. Note that the value of the `try_emplace` call + // is irrelevant. + auto [it, newElement] = blankNodeMap.try_emplace(id, Id::makeUndefined()); + if (newElement) { + it->second = Id::makeFromBlankNodeIndex( + localVocab_.getBlankNodeIndex(index_.getBlankNodeManager())); + } + return it->second; + }; + + // Return true iff `blankNodeIndex` is a blank node index from the original + // index. + auto isGlobalBlankNode = [minLocalBlankNode = + index_.getBlankNodeManager()->minIndex_]( + BlankNodeIndex blankNodeIndex) { + return blankNodeIndex.get() < minLocalBlankNode; + }; + + // Helper lambda that converts a single local vocab or blank node `id` as + // described in the comment for this function. All other types are left + // unchanged. + auto convertId = [this, isGlobalBlankNode, &getLocalBlankNode](Id& id) { + if (id.getDatatype() == Datatype::LocalVocabIndex) { + id = Id::makeFromLocalVocabIndex( + localVocab_.getIndexAndAddIfNotContained(*id.getLocalVocabIndex())); + } else if (id.getDatatype() == Datatype::BlankNodeIndex) { + auto idx = id.getBlankNodeIndex(); + if (isGlobalBlankNode(idx) || + localVocab_.isBlankNodeIndexContained(idx)) { + return; + } + id = getLocalBlankNode(id); + } + }; + + // Convert all local vocab and blank node `Id`s in all `triples`. + std::ranges::for_each(triples, [&convertId](IdTriple<0>& triple) { + std::ranges::for_each(triple.ids_, convertId); + std::ranges::for_each(triple.payload_, convertId); + }); +} + // ____________________________________________________________________________ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle, Triples triples, bool shouldExist, TriplesToHandlesMap& targetMap, TriplesToHandlesMap& inverseMap) { + rewriteLocalVocabEntriesAndBlankNodes(triples); std::ranges::sort(triples); auto [first, last] = std::ranges::unique(triples); triples.erase(first, last); diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 55f1c24648..05342a845b 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -31,6 +31,7 @@ class DeltaTriples { FRIEND_TEST(DeltaTriplesTest, insertTriplesAndDeleteTriples); FRIEND_TEST(DeltaTriplesTest, clear); + FRIEND_TEST(DeltaTriplesTest, addTriplesToLocalVocab); private: // The index to which these triples are added. @@ -123,6 +124,16 @@ class DeltaTriples { bool shouldExist, TriplesToHandlesMap& targetMap, TriplesToHandlesMap& inverseMap); + // Rewrite each triple in `triples` such that all local vocab entries and all + // local blank nodes are managed by the `localVocab_` of this class. + // + // NOTE: This is important for two reasons: (1) It avoids duplicates for + // successive insertions referring to the same local vocab entries; (2) It + // avoids storing local vocab entries or blank nodes that were created only + // temporarily when evaluating the WHERE clause of an update query. + void rewriteLocalVocabEntriesAndBlankNodes(Triples& triples); + FRIEND_TEST(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes); + // Erase `LocatedTriple` object from each `LocatedTriplesPerBlock` list. The // argument are iterators for each list, as returned by the method // `locateTripleInAllPermutations` above. diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index cff86c08fa..6b367118d5 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -34,7 +34,7 @@ BlankNodeManager::Block BlankNodeManager::allocateBlock() { // _____________________________________________________________________________ BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex) - : blockIdx_(blockIndex), nextIdx_(startIndex) {} + : blockIdx_(blockIndex), startIdx_(startIndex), nextIdx_(startIndex) {} // _____________________________________________________________________________ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( @@ -44,7 +44,7 @@ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( // _____________________________________________________________________________ BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { auto ptr = blankNodeManager_->usedBlocksSet_.wlock(); - for (auto block : blocks_) { + for (const auto& block : blocks_) { AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_)); ptr->erase(block.blockIdx_); } @@ -59,4 +59,12 @@ uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { return blocks_.back().nextIdx_++; } +// _____________________________________________________________________________ +bool BlankNodeManager::LocalBlankNodeManager::containsBlankNodeIndex( + uint64_t index) const { + return std::ranges::any_of(blocks_, [index](const Block& block) { + return index >= block.startIdx_ && index < block.nextIdx_; + }); +} + } // namespace ad_utility diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index a39616e94f..7fd5416294 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -60,31 +60,38 @@ class BlankNodeManager { ~Block() = default; // The index of this block. const uint64_t blockIdx_; + + // The first index within this block + const uint64_t startIdx_; // The next free index within this block. uint64_t nextIdx_; }; - // Manages the BlankNodes used within a LocalVocab. + // Manages the blank nodes for a single local vocab. class LocalBlankNodeManager { public: explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); ~LocalBlankNodeManager(); - // No copy, as the managed blocks shall not be duplicated. + // No copy, as the managed blocks should not be duplicated. LocalBlankNodeManager(const LocalBlankNodeManager&) = delete; LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete; + // Move is allowed. LocalBlankNodeManager(LocalBlankNodeManager&&) = default; LocalBlankNodeManager& operator=(LocalBlankNodeManager&&) = default; - // Get a new id. + // Get a new blank node index. [[nodiscard]] uint64_t getId(); + // Return true iff the `index` was returned by a previous call to `getId()`. + bool containsBlankNodeIndex(uint64_t index) const; + private: // Reserved blocks. std::vector blocks_; - // Reference of the BlankNodeManager, used to free the reserved blocks. + // Reference to the BlankNodeManager, used to free the reserved blocks. BlankNodeManager* blankNodeManager_; // The first index after the current Block. @@ -93,7 +100,7 @@ class BlankNodeManager { FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); }; - // Allocate and retrieve a block of free ids. + // Allocate and retrieve a block of new blank node indexes. [[nodiscard]] Block allocateBlock(); FRIEND_TEST(BlankNodeManager, blockAllocationAndFree); diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index 8a6e89ea6b..70803bd3f0 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -46,10 +46,14 @@ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { // no blocks are allocated yet uint64_t id = l.getId(); EXPECT_EQ(l.blocks_.size(), 1); + EXPECT_TRUE(l.containsBlankNodeIndex(id)); + EXPECT_FALSE(l.containsBlankNodeIndex(id + 1)); + EXPECT_FALSE(l.containsBlankNodeIndex(id - 1)); // or the ids of the last block are all used l.blocks_.back().nextIdx_ = id + BlankNodeManager::blockSize_; id = l.getId(); + EXPECT_TRUE(l.containsBlankNodeIndex(id)); EXPECT_EQ(l.blocks_.size(), 2); } diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index e1c39e8bfc..8871a43c0e 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -70,7 +70,8 @@ class DeltaTriplesTest : public ::testing::Test { " . " " . " " . " - " "; + " . " + " _:blubb"; // Query execution context with index for testing, see `IndexTestHelpers.h`. QueryExecutionContext* testQec = ad_utility::testing::getQec(testTurtle); @@ -275,3 +276,79 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {" ", " ", " ", " ", " "})); } + +// Test the rewriting of local vocab entries and blank nodes. +TEST_F(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes) { + // Create a triple with a new local vocab entry and a new blank node. Use the + // same new blank node twice (as object ID and graph ID, not important) so + // that we can test that both occurrences are rewritten to the same new blank + // node. + DeltaTriples deltaTriples(testQec->getIndex()); + auto& vocab = testQec->getIndex().getVocab(); + LocalVocab localVocabOutside; + auto triples = + makeIdTriples(vocab, localVocabOutside, {" "}); + AD_CORRECTNESS_CHECK(triples.size() == 1); + triples[0].ids_[2] = + Id::makeFromBlankNodeIndex(BlankNodeIndex::make(999'888'777)); + triples[0].ids_[3] = triples[0].ids_[2]; + auto [s1, p1, o1, g1] = triples[0].ids_; + + // Rewrite the IDs in the triple. + deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples); + auto [s2, p2, o2, g2] = triples[0].ids_; + + // The subject is part of the global vocabulary, so it remains unchanged. + EXPECT_EQ(s2.getBits(), s1.getBits()); + + // The predicate `` is part of the local vocab, so it gets + // rewritten, hence the `EXPECT_NE(p2, p1)`. The `EXPECT_EQ(p1, p2)` tests + // that the strings are equal (which they should be). + ASSERT_TRUE(p1.getDatatype() == Datatype::LocalVocabIndex); + ASSERT_TRUE(p2.getDatatype() == Datatype::LocalVocabIndex); + EXPECT_EQ(p1, p2); + EXPECT_NE(p2.getBits(), p1.getBits()); + + // Test that the rewritten ID is stored (and thereby kept alive) by the + // local vocab of the `DeltaTriples` class. + auto& localVocab = deltaTriples.localVocab_; + auto idx = p2.getLocalVocabIndex(); + EXPECT_EQ(idx, localVocab.getIndexOrNullopt(*idx)); + + // Check that the blank node is rewritten (it gets a new blank node index, + // and hence also a new ID). + ASSERT_TRUE(o1.getDatatype() == Datatype::BlankNodeIndex); + ASSERT_TRUE(o2.getDatatype() == Datatype::BlankNodeIndex); + EXPECT_NE(o2, o1); + EXPECT_NE(o2.getBits(), o1.getBits()); + + // Same for the graph blank node. + ASSERT_TRUE(g1.getDatatype() == Datatype::BlankNodeIndex); + ASSERT_TRUE(g2.getDatatype() == Datatype::BlankNodeIndex); + EXPECT_NE(g2, g1); + EXPECT_NE(g2.getBits(), g1.getBits()); + + // The object and the graph ID were the same blank node, so they should + // be rewritten to the same new ID. + EXPECT_EQ(g1.getBits(), o1.getBits()); + EXPECT_EQ(g2.getBits(), o2.getBits()); + + // If we rewrite the already written triples again, nothing should change, + // as the `LocalVocab` of the `DeltaTriples` class is aware that it already + // stores the corresponding values. + deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples); + ASSERT_EQ(triples.size(), 1); + auto [s3, p3, o3, g3] = triples[0].ids_; + EXPECT_EQ(s3.getBits(), s2.getBits()); + EXPECT_EQ(p3.getBits(), p2.getBits()); + EXPECT_EQ(o3.getBits(), o2.getBits()); + EXPECT_EQ(g3.getBits(), g2.getBits()); + + // If we use a local blank node that is already part of the global vocabulary, + // nothing gets rewritten either. + auto blank0 = Id::makeFromBlankNodeIndex(BlankNodeIndex::make(0)); + triples[0].ids_[0] = blank0; + deltaTriples.rewriteLocalVocabEntriesAndBlankNodes(triples); + auto s4 = triples[0].ids_[0]; + EXPECT_EQ(s4.getBits(), blank0.getBits()); +}