From 2ebca4d2bb78752ccb32166ad4d367e887eff9a0 Mon Sep 17 00:00:00 2001 From: Joe Date: Mon, 21 Oct 2024 08:57:19 +0200 Subject: [PATCH] Add Path Search Feature to Qlever (#1335) This commit adds a feature that computes all paths between pairs of `(source, target)` on a given graph. The source, target, and graph can be configured. The syntax to trigger this search is using a SERVICE request with a special IRI. The details of this API are described in the file `docs/path_search.md`. The current implementation only supports the computation of all paths, which can be exponentially larger than the number of edges in the graph. The implemented infrastructure makes it simpler to implement additional features for this service in the future, such as: * Only return an arbitrary single path between given start and end nodes. * Only return the shortest path * Only return the longest path (same complexity as "all paths" , but less memory requirements). * Return all edges that lie on an path. Additionally it can be extended by efficiently supporting the LIMIT/OFFSET clauses and lazy evaluation to efficiently support dealing with very large results in the future. --- docs/path_search.md | 290 +++++++ e2e/scientists_queries.yaml | 28 + src/engine/CMakeLists.txt | 2 +- src/engine/CheckUsePatternTrick.cpp | 3 +- src/engine/PathSearch.cpp | 460 +++++++++++ src/engine/PathSearch.h | 282 +++++++ src/engine/QueryPlanner.cpp | 121 +++ src/engine/QueryPlanner.h | 5 + src/parser/GraphPatternOperation.cpp | 145 ++++ src/parser/GraphPatternOperation.h | 88 ++- src/parser/TripleComponent.h | 6 + .../sparqlParser/SparqlQleverVisitor.cpp | 39 +- src/parser/sparqlParser/SparqlQleverVisitor.h | 6 +- test/CMakeLists.txt | 2 + test/PathSearchTest.cpp | 722 ++++++++++++++++++ test/QueryPlannerTest.cpp | 700 +++++++++++++++++ test/QueryPlannerTestHelpers.h | 34 + 17 files changed, 2925 insertions(+), 8 deletions(-) create mode 100644 docs/path_search.md create mode 100644 src/engine/PathSearch.cpp create mode 100644 src/engine/PathSearch.h create mode 100644 test/PathSearchTest.cpp diff --git a/docs/path_search.md b/docs/path_search.md new file mode 100644 index 0000000000..10ae4e0f51 --- /dev/null +++ b/docs/path_search.md @@ -0,0 +1,290 @@ +# Path Search Feature Documentation for SPARQL Engine + +## Overview + +The Path Search feature in this SPARQL engine allows users to perform advanced queries +to find paths between sources and targets in a graph. It supports a variety of configurations, +including single or multiple source and target nodes, optional edge properties, and +custom algorithms for path discovery. This feature is accessed using the `SERVICE` keyword +and the service IRI ``. + +## Basic Syntax + +The general structure of a Path Search query is as follows: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; # Specify the algorithm + pathSearch:source ; # Specify the source node(s) + pathSearch:target ; # Specify the target node(s) + pathSearch:pathColumn ?path ; # Bind the path variable + pathSearch:edgeColumn ?edge ; # Bind the edge variable + pathSearch:start ?start ; # Bind the edge start variable + pathSearch:end ?end ; # Bind the edge end variable + {SELECT * WHERE { + ?start ?end. # Define the edge pattern + }} + } +} +``` + +### Parameters + +- **pathSearch:algorithm**: Defines the algorithm used to search paths. Currently, only `pathSearch:allPaths` is supported. +- **pathSearch:source**: Defines the source node(s) of the search. +- **pathSearch:target** (optional): Defines the target node(s) of the search. +- **pathSearch:pathColumn**: Defines the variable for the path. +- **pathSearch:edgeColumn**: Defines the variable for the edge. +- **pathSearch:start**: Defines the variable for the start of the edges. +- **pathSearch:end**: Defines the variable for the end of the edges. +- **pathSearch:edgeProperty** (optional): Specifies properties for the edges in the path. +- **pathSearch:cartesian** (optional): Controls the behaviour of path searches between + source and target nodes. Expects a boolean. The default is `true`. + - If set to `true`, the search will compute the paths from each source to **all targets** + - If set to `false`, the search will compute the paths from each source to exactly + **one target**. Sources and targets are paired based on their index (i.e. the paths + from the first source to the first target are searched, then the second source and + target, and so on). + + +### Example 1: Single Source and Target + +The simplest case is searching for paths between a single source and a single target: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 2: Multiple Sources or Targets + +It is possible to specify a set of sources or targets for the path search. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +This query will search forall between all sources and all targets, i.e. +- (``, ``) +- (``, ``) +- (``, ``) +- (``, ``) + +It is possible to specify, whether the sources and targets should be combined according +to the cartesian product (as seen above) or if they should be matched up pairwise, i.e. +- (``, ``) +- (``, ``) + +This can be done with the parameter `pathSearch:cartesian`. This parameter expects a +boolean. If set to `true`, then the cartesian product is used to match the sources with +the targets. +If set to `false`, then the sources and targets are matched pairwise. If left +unspecified, then the default `true` is used. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + pathSearch:cartesian false; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 3: Edge Properties + +You can also include edge properties in the path search to further refine the results: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + } + } + } +} +``` + +This is esecially useful for [N-ary relations](https://www.w3.org/TR/swbp-n-aryRelations/). +Considering the example above, it is possible to query additional relations of `?middle`: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:edgeProperty ?edgeInfo ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + ?middle ?edgeInfo. + } + } + } +} +``` + +This makes it possible to query additional properties of the edge between `?start` and `?end` (such as `?edgeInfo` in the example above). + + +### Example 4: Source or Target as Variables + +You can also bind the source and/or target dynamically using variables. The examples +below use `VALUES` clauses, which can be convenient to specify sources and targets. +However, the source/target variables can also be bound using any regular SPARQL construct. + +#### Source Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?source {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ?source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +#### Target Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?target {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ?target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +## Error Handling + +The Path Search feature will throw errors in the following scenarios: + +- **Missing Start Parameter**: If the `start` parameter is not specified, an error will be raised. +- **Multiple Start or End Variables**: If multiple `start` or `end` variables are defined, an error is raised. +- **Invalid Non-Variable Start/End**: If the `start` or `end` parameter is not bound to a variable, the query will fail. +- **Unsupported Argument**: Arguments other than those listed (like custom user arguments) will cause an error. +- **Non-IRI Predicate**: Predicates must be IRIs. If not, an error will occur. + +### Example: Missing Start Parameter + +```sparql +PREFIX pathSearch: +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:end ?end ; # Missing start + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +This query would fail with a "Missing parameter 'start'" error. + diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index a945eeb574..1fc78430be 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1017,6 +1017,34 @@ queries: - contains_row: [""] - contains_row: ["1.87"] + - query: path_search_all_paths + type: no-text + sparql: | + PREFIX pathSearch: + SELECT * WHERE { + SERVICE pathSearch: { + pathSearch: pathSearch:algorithm pathSearch:allPaths; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path; + pathSearch:edgeColumn ?edge; + pathSearch:start ?start; + pathSearch:end ?end; + {SELECT * WHERE { + ?start ?end + }} + } + } + checks: + - num_rows: 17 + - num_cols: 4 + - selected: ["?path", "?edge", "?start", "?end"] + - contains_row: ["0", "0", "", ""] + - contains_row: ["0", "1", "", ""] + - contains_row: ["0", "2", "", ""] + - contains_row: ["4", "0", "", ""] + - contains_row: ["4", "1", "", ""] + - query : property_path_inverse type: no-text diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 0f37137531..41a9a33a68 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -13,5 +13,5 @@ add_library(engine VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp - CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp) + CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp index fd153adc0d..9bfe8c4734 100644 --- a/src/engine/CheckUsePatternTrick.cpp +++ b/src/engine/CheckUsePatternTrick.cpp @@ -69,7 +69,8 @@ bool isVariableContainedInGraphPatternOperation( } else if constexpr (std::is_same_v) { return ad_utility::contains(arg.visibleVariables_, variable); } else { - static_assert(std::is_same_v); + static_assert(std::is_same_v || + std::is_same_v); // The `TransPath` is set up later in the query planning, when this // function should not be called anymore. AD_FAIL(); diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp new file mode 100644 index 0000000000..50f10210a6 --- /dev/null +++ b/src/engine/PathSearch.cpp @@ -0,0 +1,460 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "PathSearch.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/QueryExecutionTree.h" +#include "engine/VariableToColumnMap.h" +#include "util/AllocatorWithLimit.h" + +using namespace pathSearch; + +// _____________________________________________________________________________ +BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, + size_t endCol, std::vector edgeCols) + : table_(table), + startCol_(startCol), + endCol_(endCol), + edgeCols_(std::move(edgeCols)) {} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::outgoingEdes(const Id node) const { + auto startIds = table_.getColumn(startCol_); + auto range = std::ranges::equal_range(startIds, node); + auto startIndex = std::distance(startIds.begin(), range.begin()); + + std::vector edges; + for (size_t i = 0; i < range.size(); i++) { + auto row = startIndex + i; + auto edge = makeEdgeFromRow(row); + edges.push_back(edge); + } + return edges; +} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::getSources() const { + auto startIds = table_.getColumn(startCol_); + std::vector sources; + std::ranges::unique_copy(startIds, std::back_inserter(sources)); + + return sources; +} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::getEdgeProperties(const Edge& edge) const { + std::vector edgeProperties; + for (auto edgeCol : edgeCols_) { + edgeProperties.push_back(table_(edge.edgeRow_, edgeCol)); + } + return edgeProperties; +} + +// _____________________________________________________________________________ +Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { + Edge edge; + edge.start_ = table_(row, startCol_); + edge.end_ = table_(row, endCol_); + edge.edgeRow_ = row; + + return edge; +} + +// _____________________________________________________________________________ +PathSearch::PathSearch(QueryExecutionContext* qec, + std::shared_ptr subtree, + PathSearchConfiguration config) + : Operation(qec), subtree_(std::move(subtree)), config_(std::move(config)) { + AD_CORRECTNESS_CHECK(qec != nullptr); + + auto startCol = subtree_->getVariableColumn(config_.start_); + auto endCol = subtree_->getVariableColumn(config_.end_); + subtree_ = QueryExecutionTree::createSortedTree(subtree_, {startCol, endCol}); + + resultWidth_ = 4 + config_.edgeProperties_.size(); + + size_t colIndex = 0; + + variableColumns_[config_.start_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.end_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + + if (std::holds_alternative(config_.sources_)) { + resultWidth_++; + const auto& sourceColumn = std::get(config_.sources_); + variableColumns_[sourceColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + } + + if (std::holds_alternative(config_.targets_)) { + resultWidth_++; + const auto& targetColumn = std::get(config_.targets_); + variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + } + + for (const auto& edgeProperty : config_.edgeProperties_) { + auto subVarCols = subtree_->getVariableColumns(); + auto colInfo = subVarCols[edgeProperty]; + variableColumns_[edgeProperty] = {colIndex, colInfo.mightContainUndef_}; + colIndex++; + } +} + +// _____________________________________________________________________________ +std::vector PathSearch::getChildren() { + std::vector res; + res.push_back(subtree_.get()); + + if (sourceAndTargetTree_.has_value()) { + res.push_back(sourceAndTargetTree_.value().get()); + } else { + if (sourceTree_.has_value()) { + res.push_back(sourceTree_.value().get()); + } + + if (targetTree_.has_value()) { + res.push_back(targetTree_.value().get()); + } + } + + return res; +}; + +// _____________________________________________________________________________ +std::string PathSearch::getCacheKeyImpl() const { + std::ostringstream os; + os << "PathSearch:\n"; + os << config_.toString(); + + AD_CORRECTNESS_CHECK(subtree_); + os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + + if (sourceTree_.has_value()) { + os << "Source Side subtree:\n" + << sourceTree_.value()->getCacheKey() << '\n'; + } + + if (targetTree_.has_value()) { + os << "Target Side subtree:\n" + << targetTree_.value()->getCacheKey() << '\n'; + } + + if (sourceAndTargetTree_.has_value()) { + os << "Source And Target Side subtree:\n" + << sourceAndTargetTree_.value()->getCacheKey() << '\n'; + } + + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +string PathSearch::getDescriptor() const { + std::ostringstream os; + os << "PathSearch"; + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +size_t PathSearch::getResultWidth() const { return resultWidth_; }; + +// _____________________________________________________________________________ +size_t PathSearch::getCostEstimate() { + // TODO: Figure out a smart way to estimate cost + return 1000; +}; + +// _____________________________________________________________________________ +uint64_t PathSearch::getSizeEstimateBeforeLimit() { + // TODO: Figure out a smart way to estimate size + return 1000; +}; + +// _____________________________________________________________________________ +float PathSearch::getMultiplicity(size_t col) { + (void)col; + return 1; +}; + +// _____________________________________________________________________________ +bool PathSearch::knownEmptyResult() { + for (auto child : getChildren()) { + if (child->knownEmptyResult()) { + return true; + } + } + return false; +}; + +// _____________________________________________________________________________ +vector PathSearch::resultSortedOn() const { return {}; }; + +// _____________________________________________________________________________ +void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol) { + sourceTree_ = sourcesOp; + sourceCol_ = inputCol; +} + +// _____________________________________________________________________________ +void PathSearch::bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol) { + targetTree_ = targetsOp; + targetCol_ = inputCol; +} + +// _____________________________________________________________________________ +void PathSearch::bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol) { + sourceAndTargetTree_ = sourceAndTargetOp; + sourceCol_ = sourceCol; + targetCol_ = targetCol; +} + +// _____________________________________________________________________________ +Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { + std::shared_ptr subRes = subtree_->getResult(); + IdTable idTable{allocator()}; + idTable.setNumColumns(getResultWidth()); + + const IdTable& dynSub = subRes->idTable(); + if (!dynSub.empty()) { + auto timer = ad_utility::Timer(ad_utility::Timer::Started); + + auto subStartColumn = subtree_->getVariableColumn(config_.start_); + auto subEndColumn = subtree_->getVariableColumn(config_.end_); + std::vector edgeColumns; + for (const auto& edgeProp : config_.edgeProperties_) { + edgeColumns.push_back(subtree_->getVariableColumn(edgeProp)); + } + BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, + std::move(edgeColumns)}; + + timer.stop(); + auto buildingTime = timer.msecs(); + timer.start(); + + auto [sources, targets] = handleSearchSides(); + + timer.stop(); + auto sideTime = timer.msecs(); + timer.start(); + + PathsLimited paths{allocator()}; + std::vector allSources; + if (sources.empty()) { + allSources = binSearch.getSources(); + sources = allSources; + } + paths = allPaths(sources, targets, binSearch, config_.cartesian_); + + timer.stop(); + auto searchTime = timer.msecs(); + timer.start(); + + CALL_FIXED_SIZE(std::array{getResultWidth()}, + &PathSearch::pathsToResultTable, this, idTable, paths, + binSearch); + + timer.stop(); + auto fillTime = timer.msecs(); + timer.start(); + + auto& info = runtimeInfo(); + info.addDetail("Time to build graph & mapping", buildingTime.count()); + info.addDetail("Time to prepare search sides", sideTime.count()); + info.addDetail("Time to search paths", searchTime.count()); + info.addDetail("Time to fill result table", fillTime.count()); + } + + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +}; + +// _____________________________________________________________________________ +VariableToColumnMap PathSearch::computeVariableToColumnMap() const { + return variableColumns_; +}; + +// _____________________________________________________________________________ +std::pair, std::span> +PathSearch::handleSearchSides() const { + std::span sourceIds; + std::span targetIds; + + if (sourceAndTargetTree_.has_value()) { + auto resultTable = sourceAndTargetTree_.value()->getResult(); + sourceIds = resultTable->idTable().getColumn(sourceCol_.value()); + targetIds = resultTable->idTable().getColumn(targetCol_.value()); + return {sourceIds, targetIds}; + } + + if (sourceTree_.has_value()) { + sourceIds = sourceTree_.value()->getResult()->idTable().getColumn( + sourceCol_.value()); + } else if (config_.sourceIsVariable()) { + sourceIds = {}; + } else { + sourceIds = std::get>(config_.sources_); + } + + if (targetTree_.has_value()) { + targetIds = targetTree_.value()->getResult()->idTable().getColumn( + targetCol_.value()); + } else if (config_.targetIsVariable()) { + targetIds = {}; + } else { + targetIds = std::get>(config_.targets_); + } + + return {sourceIds, targetIds}; +} + +// _____________________________________________________________________________ +PathsLimited PathSearch::findPaths(const Id& source, + const std::unordered_set& targets, + const BinSearchWrapper& binSearch) const { + std::vector edgeStack; + Path currentPath{EdgesLimited(allocator())}; + std::unordered_map< + uint64_t, PathsLimited, std::hash, std::equal_to, + ad_utility::AllocatorWithLimit>> + pathCache{allocator()}; + PathsLimited result{allocator()}; + std::unordered_set, std::equal_to, + ad_utility::AllocatorWithLimit> + visited{allocator()}; + + visited.insert(source.getBits()); + for (auto edge : binSearch.outgoingEdes(source)) { + edgeStack.push_back(std::move(edge)); + } + + while (!edgeStack.empty()) { + checkCancellation(); + auto edge = edgeStack.back(); + edgeStack.pop_back(); + + visited.insert(edge.end_.getBits()); + + while (!currentPath.empty() && edge.start_ != currentPath.end()) { + visited.erase(currentPath.end().getBits()); + currentPath.pop_back(); + } + + currentPath.push_back(edge); + + if (targets.empty() || targets.contains(edge.end_.getBits())) { + result.push_back(currentPath); + } + + for (const auto& outgoingEdge : binSearch.outgoingEdes(edge.end_)) { + if (!visited.contains(outgoingEdge.end_.getBits())) { + edgeStack.push_back(outgoingEdge); + } + } + } + + return result; +} + +// _____________________________________________________________________________ +PathsLimited PathSearch::allPaths(std::span sources, + std::span targets, + const BinSearchWrapper& binSearch, + bool cartesian) const { + PathsLimited paths{allocator()}; + Path path{EdgesLimited(allocator())}; + + if (cartesian || sources.size() != targets.size()) { + std::unordered_set targetSet; + for (auto target : targets) { + targetSet.insert(target.getBits()); + } + for (auto source : sources) { + for (const auto& path : findPaths(source, targetSet, binSearch)) { + paths.push_back(path); + } + } + } else { + for (size_t i = 0; i < sources.size(); i++) { + for (const auto& path : + findPaths(sources[i], {targets[i].getBits()}, binSearch)) { + paths.push_back(path); + } + } + } + return paths; +} + +// _____________________________________________________________________________ +template +void PathSearch::pathsToResultTable(IdTable& tableDyn, PathsLimited& paths, + const BinSearchWrapper& binSearch) const { + IdTableStatic table = std::move(tableDyn).toStatic(); + + std::vector edgePropertyCols; + for (const auto& edgeVar : config_.edgeProperties_) { + auto edgePropertyCol = variableColumns_.at(edgeVar).columnIndex_; + edgePropertyCols.push_back(edgePropertyCol); + } + + size_t rowIndex = 0; + for (size_t pathIndex = 0; pathIndex < paths.size(); pathIndex++) { + auto path = paths[pathIndex]; + + std::optional sourceId = std::nullopt; + if (config_.sourceIsVariable()) { + sourceId = path.edges_.front().start_; + } + + std::optional targetId = std::nullopt; + if (config_.targetIsVariable()) { + targetId = path.edges_.back().end_; + } + + for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { + checkCancellation(); + auto edge = path.edges_[edgeIndex]; + table.emplace_back(); + table(rowIndex, getStartIndex()) = edge.start_; + table(rowIndex, getEndIndex()) = edge.end_; + table(rowIndex, getPathIndex()) = Id::makeFromInt(pathIndex); + table(rowIndex, getEdgeIndex()) = Id::makeFromInt(edgeIndex); + + if (sourceId) { + table(rowIndex, getSourceIndex().value()) = sourceId.value(); + } + + if (targetId) { + table(rowIndex, getTargetIndex().value()) = targetId.value(); + } + + auto edgeProperties = binSearch.getEdgeProperties(edge); + for (size_t edgePropertyIndex = 0; + edgePropertyIndex < edgeProperties.size(); edgePropertyIndex++) { + table(rowIndex, edgePropertyCols[edgePropertyIndex]) = + edgeProperties[edgePropertyIndex]; + } + + rowIndex++; + } + } + + tableDyn = std::move(table).toDynamic(); +} diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h new file mode 100644 index 0000000000..9e330d1d4e --- /dev/null +++ b/src/engine/PathSearch.h @@ -0,0 +1,282 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include +#include +#include +#include +#include + +#include "engine/Operation.h" +#include "global/Id.h" +#include "util/AllocatorWithLimit.h" + +enum class PathSearchAlgorithm { ALL_PATHS }; + +/** + * @brief Represents the source or target side of a PathSearch. + * The side can either be a variable or a list of Ids. + */ +using SearchSide = std::variant>; + +namespace pathSearch { +struct Edge { + Id start_; + + Id end_; + + size_t edgeRow_; +}; + +using EdgesLimited = std::vector>; + +struct Path { + EdgesLimited edges_; + + bool empty() const { return edges_.empty(); } + + size_t size() const { return edges_.size(); } + + void push_back(const Edge& edge) { edges_.push_back(edge); } + + void pop_back() { edges_.pop_back(); } + + const Id& end() { return edges_.back().end_; } +}; + +using PathsLimited = std::vector>; + +/** + * @class BinSearchWrapper + * @brief Encapsulates logic for binary search of edges in + * an IdTable. It provides methods to find outgoing edges from + * a node and retrie + * + */ +class BinSearchWrapper { + const IdTable& table_; + size_t startCol_; + size_t endCol_; + std::vector edgeCols_; + + public: + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, + std::vector edgeCols); + + /** + * @brief Return all outgoing edges of a node + * + * @param node The start node of the outgoing edges + */ + std::vector outgoingEdes(const Id node) const; + + /** + * @brief Returns the start nodes of all edges. + * In case the sources field for the path search is empty, + * the search starts from all possible sources (i.e. all + * start nodes). Returns only unique start nodes. + */ + std::vector getSources() const; + + std::vector getEdgeProperties(const Edge& edge) const; + + private: + Edge makeEdgeFromRow(size_t row) const; +}; +} // namespace pathSearch + +struct PathSearchConfiguration { + PathSearchAlgorithm algorithm_; + SearchSide sources_; + SearchSide targets_; + Variable start_; + Variable end_; + Variable pathColumn_; + Variable edgeColumn_; + std::vector edgeProperties_; + bool cartesian_ = true; + + bool sourceIsVariable() const { + return std::holds_alternative(sources_); + } + bool targetIsVariable() const { + return std::holds_alternative(targets_); + } + + std::string searchSideToString(const SearchSide& side) const { + if (std::holds_alternative(side)) { + return std::get(side).toSparql(); + } + std::ostringstream os; + for (auto id : std::get>(side)) { + os << id << ", "; + } + return std::move(os).str(); + } + + std::string toString() const { + std::ostringstream os; + if (algorithm_ == PathSearchAlgorithm::ALL_PATHS) { + os << "Algorithm: All paths" << '\n'; + } + + os << "Source: " << searchSideToString(sources_) << '\n'; + os << "Target: " << searchSideToString(targets_) << '\n'; + + os << "Start: " << start_.toSparql() << '\n'; + os << "End: " << end_.toSparql() << '\n'; + os << "PathColumn: " << pathColumn_.toSparql() << '\n'; + os << "EdgeColumn: " << edgeColumn_.toSparql() << '\n'; + + os << "EdgeProperties:" << '\n'; + for (const auto& edgeProperty : edgeProperties_) { + os << " " << edgeProperty.toSparql() << '\n'; + } + + return std::move(os).str(); + } +}; + +/** + * @class PathSearch + * @brief Main class implementing the path search operation. + * It manages the configuration, executes the search and + * builds the ResultTable. + * + */ +class PathSearch : public Operation { + std::shared_ptr subtree_; + size_t resultWidth_; + VariableToColumnMap variableColumns_; + + PathSearchConfiguration config_; + + // The following optional fields are filled, depending + // on how the PathSearch is bound. + std::optional sourceCol_; + std::optional targetCol_; + + std::optional> sourceTree_; + std::optional> targetTree_; + std::optional> sourceAndTargetTree_; + + public: + PathSearch(QueryExecutionContext* qec, + std::shared_ptr subtree, + PathSearchConfiguration config); + + std::vector getChildren() override; + + const PathSearchConfiguration& getConfig() const { return config_; } + + ColumnIndex getStartIndex() const { + return variableColumns_.at(config_.start_).columnIndex_; + } + ColumnIndex getEndIndex() const { + return variableColumns_.at(config_.end_).columnIndex_; + } + ColumnIndex getPathIndex() const { + return variableColumns_.at(config_.pathColumn_).columnIndex_; + } + ColumnIndex getEdgeIndex() const { + return variableColumns_.at(config_.edgeColumn_).columnIndex_; + } + std::optional getSourceIndex() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + const auto& sourceVar = std::get(config_.sources_); + return variableColumns_.at(sourceVar).columnIndex_; + } + std::optional getTargetIndex() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + const auto& targetVar = std::get(config_.targets_); + return variableColumns_.at(targetVar).columnIndex_; + } + + string getCacheKeyImpl() const override; + string getDescriptor() const override; + size_t getResultWidth() const override; + + size_t getCostEstimate() override; + + uint64_t getSizeEstimateBeforeLimit() override; + float getMultiplicity(size_t col) override; + bool knownEmptyResult() override; + + vector resultSortedOn() const override; + + void bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol); + void bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol); + + void bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol); + + bool isSourceBound() const { + return sourceTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.sourceIsVariable(); + } + + bool isTargetBound() const { + return targetTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.targetIsVariable(); + } + + std::optional getSourceColumn() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.sources_)) + .columnIndex_; + } + + std::optional getTargetColumn() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.targets_)) + .columnIndex_; + } + + Result computeResult([[maybe_unused]] bool requestLaziness) override; + VariableToColumnMap computeVariableToColumnMap() const override; + + private: + std::pair, std::span> handleSearchSides() const; + + /** + * @brief Finds paths based on the configured algorithm. + * @return A vector of paths. + */ + pathSearch::PathsLimited findPaths( + const Id& source, const std::unordered_set& targets, + const pathSearch::BinSearchWrapper& binSearch) const; + + /** + * @brief Finds all paths in the graph. + * @return A vector of all paths. + */ + pathSearch::PathsLimited allPaths( + std::span sources, std::span targets, + const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; + + /** + * @brief Converts paths to a result table with a specified width. + * @tparam WIDTH The width of the result table. + * @param tableDyn The dynamic table to store the results. + * @param paths The vector of paths to convert. + */ + template + void pathsToResultTable(IdTable& tableDyn, pathSearch::PathsLimited& paths, + const pathSearch::BinSearchWrapper& binSearch) const; +}; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index aeb247d45e..b63958e198 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -9,6 +9,7 @@ #include #include +#include #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" @@ -26,6 +27,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/Service.h" #include "engine/Sort.h" #include "engine/SpatialJoin.h" @@ -37,9 +39,11 @@ #include "engine/Values.h" #include "engine/sparqlExpressions/LiteralExpression.h" #include "engine/sparqlExpressions/RelationalExpressions.h" +#include "global/Id.h" #include "global/RuntimeParameters.h" #include "parser/Alias.h" #include "parser/SparqlParserHelpers.h" +#include "util/Exception.h" namespace p = parsedQuery; namespace { @@ -1820,6 +1824,11 @@ std::vector QueryPlanner::createJoinCandidates( return {makeSubtreePlan(_qec, a._qet, b._qet)}; } + if (auto opt = createJoinWithPathSearch(a, b, jcs)) { + candidates.push_back(std::move(opt.value())); + return candidates; + } + // Check if one of the two Operations is a SERVICE. If so, we can try // to simplify the Service Query using the result of the other operation. if (auto opt = createJoinWithService(a, b, jcs)) { @@ -2064,6 +2073,98 @@ auto QueryPlanner::createSubtreeWithService(const SubtreePlan& a, return plan; } +// _____________________________________________________________________ +auto QueryPlanner::createJoinWithPathSearch( + const SubtreePlan& a, const SubtreePlan& b, + const std::vector>& jcs) + -> std::optional { + auto aRootOp = + std::dynamic_pointer_cast(a._qet->getRootOperation()); + auto bRootOp = + std::dynamic_pointer_cast(b._qet->getRootOperation()); + + // Exactly one of the two Operations can be a path search. + if (static_cast(aRootOp) == static_cast(bRootOp)) { + return std::nullopt; + } + + auto pathSearch = aRootOp ? aRootOp : bRootOp; + auto sibling = bRootOp ? a : b; + + auto decideColumns = [aRootOp](std::array joinColumns) + -> std::pair { + auto thisCol = aRootOp ? joinColumns[0] : joinColumns[1]; + auto otherCol = aRootOp ? joinColumns[1] : joinColumns[0]; + return {thisCol, otherCol}; + }; + + // Only source and target may be bound directly + if (jcs.size() > 2) { + return std::nullopt; + } + + auto sourceColumn = pathSearch->getSourceColumn(); + auto targetColumn = pathSearch->getTargetColumn(); + + // Either source or target column have to be a variable to create a join + if (!sourceColumn && !targetColumn) { + return std::nullopt; + } + + // A join on an edge property column should not create any candidates + auto isJoinOnSourceOrTarget = [sourceColumn, + targetColumn](size_t joinColumn) { + return ((sourceColumn && sourceColumn.value() == joinColumn) || + (targetColumn && targetColumn.value() == joinColumn)); + }; + + if (jcs.size() == 2) { + // To join source and target, both must be variables + if (!sourceColumn || !targetColumn) { + return std::nullopt; + } + + auto [firstCol, firstOtherCol] = decideColumns(jcs[0]); + + auto [secondCol, secondOtherCol] = decideColumns(jcs[1]); + + if (!isJoinOnSourceOrTarget(firstCol) && + !isJoinOnSourceOrTarget(secondCol)) { + return std::nullopt; + } + + if (sourceColumn == firstCol && targetColumn == secondCol) { + pathSearch->bindSourceAndTargetSide(sibling._qet, firstOtherCol, + secondOtherCol); + } else if (sourceColumn == secondCol && targetColumn == firstCol) { + pathSearch->bindSourceAndTargetSide(sibling._qet, secondOtherCol, + firstOtherCol); + } else { + return std::nullopt; + } + } else if (jcs.size() == 1) { + auto [thisCol, otherCol] = decideColumns(jcs[0]); + + if (!isJoinOnSourceOrTarget(thisCol)) { + return std::nullopt; + } + + if (sourceColumn && sourceColumn == thisCol && + !pathSearch->isSourceBound()) { + pathSearch->bindSourceSide(sibling._qet, otherCol); + } else if (targetColumn && targetColumn == thisCol && + !pathSearch->isTargetBound()) { + pathSearch->bindTargetSide(sibling._qet, otherCol); + } + } else { + return std::nullopt; + } + + SubtreePlan plan = makeSubtreePlan(pathSearch); + mergeSubtreePlanIds(plan, a, b); + return plan; +} + // _____________________________________________________________________ void QueryPlanner::QueryGraph::setupGraph( const std::vector& leafOperations) { @@ -2289,6 +2390,8 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { c.type = SubtreePlan::MINUS; } visitGroupOptionalOrMinus(std::move(candidates)); + } else if constexpr (std::is_same_v) { + visitPathSearch(arg); } else { static_assert(std::is_same_v); visitBasicGraphPattern(arg); @@ -2396,6 +2499,24 @@ void QueryPlanner::GraphPatternPlanner::visitTransitivePath( visitGroupOptionalOrMinus(std::move(candidatesOut)); } +// _______________________________________________________________ +void QueryPlanner::GraphPatternPlanner::visitPathSearch( + parsedQuery::PathQuery& pathQuery) { + auto candidatesIn = planner_.optimize(&pathQuery.childGraphPattern_); + std::vector candidatesOut; + + const auto& vocab = planner_._qec->getIndex().getVocab(); + auto config = pathQuery.toPathSearchConfiguration(vocab); + + for (auto& sub : candidatesIn) { + auto pathSearch = + std::make_shared(qec_, std::move(sub._qet), config); + auto plan = makeSubtreePlan(std::move(pathSearch)); + candidatesOut.push_back(std::move(plan)); + } + visitGroupOptionalOrMinus(std::move(candidatesOut)); +} + // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::visitUnion(parsedQuery::Union& arg) { // TODO here we could keep all the candidates, and create a diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 360a40d556..8d3a13cd3b 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -344,6 +344,10 @@ class QueryPlanner { const SubtreePlan& a, const SubtreePlan& b, const std::vector>& jcs); + [[nodiscard]] static std::optional createJoinWithPathSearch( + const SubtreePlan& a, const SubtreePlan& b, + const std::vector>& jcs); + template [[nodiscard]] static std::optional createSubtreeWithService( const SubtreePlan& a, const SubtreePlan& b); @@ -532,6 +536,7 @@ class QueryPlanner { void visitBasicGraphPattern(const parsedQuery::BasicGraphPattern& pattern); void visitBind(const parsedQuery::Bind& bind); void visitTransitivePath(parsedQuery::TransPath& transitivePath); + void visitPathSearch(parsedQuery::PathQuery& config); void visitUnion(parsedQuery::Union& un); void visitSubquery(parsedQuery::Subquery& subquery); diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index bddd21c5e9..efceda159c 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -5,9 +5,14 @@ #include "parser/GraphPatternOperation.h" +#include +#include + #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "parser/ParsedQuery.h" +#include "parser/TripleComponent.h" +#include "util/Exception.h" #include "util/Forward.h" namespace parsedQuery { @@ -67,6 +72,146 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { ad_utility::appendVector(_triples, std::move(other._triples)); } +// ____________________________________________________________________________ +void PathQuery::addParameter(const SparqlTriple& triple) { + auto simpleTriple = triple.getSimple(); + TripleComponent predicate = simpleTriple.p_; + TripleComponent object = simpleTriple.o_; + + if (!predicate.isIri()) { + throw PathSearchException("Predicates must be IRIs"); + } + + auto getVariable = [](std::string_view parameter, + const TripleComponent& object) { + if (!object.isVariable()) { + throw PathSearchException(absl::StrCat("The value ", object.toString(), + " for parameter '", parameter, + "' has to be a variable")); + } + + return object.getVariable(); + }; + + auto setVariable = [&](std::string_view parameter, + const TripleComponent& object, + std::optional& existingValue) { + auto variable = getVariable(parameter, object); + + if (existingValue.has_value()) { + throw PathSearchException(absl::StrCat( + "The parameter '", parameter, "' has already been set to variable: '", + existingValue.value().toSparql(), "'. New variable: '", + object.toString(), "'.")); + } + + existingValue = object.getVariable(); + }; + + std::string predString = predicate.getIri().toStringRepresentation(); + if (predString.ends_with("source>")) { + sources_.push_back(std::move(object)); + } else if (predString.ends_with("target>")) { + targets_.push_back(std::move(object)); + } else if (predString.ends_with("start>")) { + setVariable("start", object, start_); + } else if (predString.ends_with("end>")) { + setVariable("end", object, end_); + } else if (predString.ends_with("pathColumn>")) { + setVariable("pathColumn", object, pathColumn_); + } else if (predString.ends_with("edgeColumn>")) { + setVariable("edgeColumn", object, edgeColumn_); + } else if (predString.ends_with("edgeProperty>")) { + edgeProperties_.push_back(getVariable("edgeProperty", object)); + } else if (predString.ends_with("cartesian>")) { + if (!object.isBool()) { + throw PathSearchException("The parameter 'cartesian' expects a boolean"); + } + cartesian_ = object.getBool(); + } else if (predString.ends_with("algorithm>")) { + if (!object.isIri()) { + throw PathSearchException("The 'algorithm' value has to be an Iri"); + } + auto objString = object.getIri().toStringRepresentation(); + + if (objString.ends_with("allPaths>")) { + algorithm_ = PathSearchAlgorithm::ALL_PATHS; + } else { + throw PathSearchException( + "Unsupported algorithm in pathSearch: " + objString + + ". Supported Algorithms: " + "allPaths."); + } + } else { + throw PathSearchException( + "Unsupported argument " + predString + + " in PathSearch. " + "Supported Arguments: source, target, start, end, " + "pathColumn, edgeColumn, " + "edgeProperty, algorithm."); + } +} + +// ____________________________________________________________________________ +std::variant> PathQuery::toSearchSide( + std::vector side, const Index::Vocab& vocab) const { + if (side.size() == 1 && side[0].isVariable()) { + return side[0].getVariable(); + } else { + std::vector sideIds; + for (const auto& comp : side) { + if (comp.isVariable()) { + throw PathSearchException( + "Only one variable is allowed per search side"); + } + auto opt = comp.toValueId(vocab); + if (opt.has_value()) { + sideIds.push_back(opt.value()); + } else { + throw PathSearchException("No vocabulary entry for " + comp.toString()); + } + } + return sideIds; + } +} + +// ____________________________________________________________________________ +void PathQuery::addBasicPattern(const BasicGraphPattern& pattern) { + for (SparqlTriple triple : pattern._triples) { + addParameter(triple); + } +} + +// ____________________________________________________________________________ +void PathQuery::addGraph(const GraphPatternOperation& op) { + if (childGraphPattern_._graphPatterns.empty()) { + auto pattern = std::get(op); + childGraphPattern_ = std::move(pattern._child); + } +} + +// ____________________________________________________________________________ +PathSearchConfiguration PathQuery::toPathSearchConfiguration( + const Index::Vocab& vocab) const { + auto sources = toSearchSide(sources_, vocab); + auto targets = toSearchSide(targets_, vocab); + + if (!start_.has_value()) { + throw PathSearchException("Missing parameter 'start' in path search."); + } else if (!end_.has_value()) { + throw PathSearchException("Missing parameter 'end' in path search."); + } else if (!pathColumn_.has_value()) { + throw PathSearchException("Missing parameter 'pathColumn' in path search."); + } else if (!edgeColumn_.has_value()) { + throw PathSearchException("Missing parameter 'edgeColumn' in path search."); + } + + return PathSearchConfiguration{ + algorithm_, sources, targets, + start_.value(), end_.value(), pathColumn_.value(), + edgeColumn_.value(), edgeProperties_, cartesian_}; +} + // ____________________________________________________________________________ cppcoro::generator Bind::containedVariables() const { for (const auto* ptr : _expression.containedVariables()) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index df95f80e89..6367d4e510 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -8,6 +8,7 @@ #include #include +#include "engine/PathSearch.h" #include "engine/sparqlExpressions/SparqlExpressionPimpl.h" #include "parser/GraphPattern.h" #include "parser/TripleComponent.h" @@ -143,6 +144,91 @@ struct TransPath { GraphPattern _childGraphPattern; }; +class PathSearchException : public std::exception { + std::string message_; + + public: + explicit PathSearchException(const std::string& message) + : message_(message) {} + const char* what() const noexcept override { return message_.data(); } +}; + +// The PathQuery object holds intermediate information for the PathSearch. +// The PathSearchConfiguration requires concrete Ids. The vocabulary from the +// QueryPlanner is needed to translate the TripleComponents to ValueIds. +// Also, the members of the PathQuery have defaults and can be set after +// the object creation, simplifying the parsing process. If a required +// value has not been set during parsing, the method 'toPathSearchConfiguration' +// will throw an exception. +// All the error handling for the PathSearch happens in the PathQuery object. +// Thus, if a PathSearchConfiguration can be constructed, it is valid. +struct PathQuery { + std::vector sources_; + std::vector targets_; + std::optional start_; + std::optional end_; + std::optional pathColumn_; + std::optional edgeColumn_; + std::vector edgeProperties_; + PathSearchAlgorithm algorithm_; + + GraphPattern childGraphPattern_; + bool cartesian_ = true; + + /** + * @brief Add a parameter to the PathQuery from the given triple. + * The predicate of the triple determines the parameter name and the object + * of the triple determines the parameter value. The subject is ignored. + * Throws a PathSearchException if an unsupported algorithm is given or if the + * predicate contains an unknown parameter name. + * + * @param triple A SparqlTriple that contains the parameter info + */ + void addParameter(const SparqlTriple& triple); + + /** + * @brief Add the parameters from a BasicGraphPattern to the PathQuery + * + * @param pattern + */ + void addBasicPattern(const BasicGraphPattern& pattern); + + /** + * @brief Add a GraphPatternOperation to the PathQuery. The pattern specifies + * the edges of the graph that is used by the path search + * + * @param childGraphPattern + */ + void addGraph(const GraphPatternOperation& childGraphPattern); + + /** + * @brief Convert the vector of triple components into a SearchSide + * The SeachSide can either be a variable or a list of Ids. + * A PathSearchException is thrown if more than one variable is given. + * + * @param side A vector of TripleComponents, containing either exactly one + * Variable or zero or more ValueIds + * @param vocab A Vocabulary containing the Ids of the TripleComponents. + * The Vocab is only used if the given vector contains IRIs. + */ + std::variant> toSearchSide( + std::vector side, const Index::Vocab& vocab) const; + + /** + * @brief Convert this PathQuery into a PathSearchConfiguration object. + * This method checks if all required parameters are set and converts + * the PathSearch sources and targets into SearchSides. + * A PathSearchException is thrown if required parameters are missing. + * The required parameters are start, end, pathColumn and edgeColumn. + * + * @param vocab A vocab containing the Ids of the IRIs in + * sources_ and targets_ + * @return A valid PathSearchConfiguration + */ + PathSearchConfiguration toPathSearchConfiguration( + const Index::Vocab& vocab) const; +}; + // A SPARQL Bind construct. struct Bind { sparqlExpression::SparqlExpressionPimpl _expression; @@ -159,7 +245,7 @@ struct Bind { // class actually becomes `using GraphPatternOperation = std::variant<...>` using GraphPatternOperationVariant = std::variant; + Values, Service, PathQuery, Minus, GroupGraphPattern>; struct GraphPatternOperation : public GraphPatternOperationVariant, public VisitMixin { diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index 85450c9975..fb874fc3c1 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -137,6 +137,12 @@ class TripleComponent { return std::holds_alternative(_variant); } + [[nodiscard]] bool isBool() const { + return std::holds_alternative(_variant); + } + + bool getBool() const { return std::get(_variant); } + bool isLiteral() const { return std::holds_alternative(_variant); } Literal& getLiteral() { return std::get(_variant); } const Literal& getLiteral() const { return std::get(_variant); } diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 777f29714d..65de837da2 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -24,6 +24,7 @@ #include "engine/sparqlExpressions/RelationalExpressions.h" #include "engine/sparqlExpressions/SampleExpression.h" #include "engine/sparqlExpressions/UuidExpressions.h" +#include "parser/GraphPatternOperation.h" #include "parser/RdfParser.h" #include "parser/SparqlParser.h" #include "parser/TokenizerCtre.h" @@ -708,8 +709,33 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) { return GraphPatternOperation{parsedQuery::Optional{std::move(pattern)}}; } +GraphPatternOperation Visitor::visitPathQuery( + Parser::ServiceGraphPatternContext* ctx) { + auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, + const parsedQuery::GraphPatternOperation& op) { + if (std::holds_alternative(op)) { + pathQuery.addBasicPattern(std::get(op)); + } else if (std::holds_alternative(op)) { + pathQuery.addGraph(op); + } else { + throw parsedQuery::PathSearchException( + "Unsupported element in pathSearch." + "PathQuery may only consist of triples for configuration" + "And a { group graph pattern } specifying edges."); + } + }; + + parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); + parsedQuery::PathQuery pathQuery; + for (const auto& op : graphPattern._graphPatterns) { + parsePathQuery(pathQuery, op); + } + + return pathQuery; +} + // Parsing for the `serviceGraphPattern` rule. -parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { +GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { // Get the IRI and if a variable is specified, report that we do not support // it yet. // @@ -728,6 +754,10 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { auto serviceIri = TripleComponent::Iri::fromIriref(std::get(varOrIri).iri()); + if (serviceIri.toStringRepresentation() == + "") { + return visitPathQuery(ctx); + } // Parse the body of the SERVICE query. Add the visible variables from the // SERVICE clause to the visible variables so far, but also remember them // separately (with duplicates removed) because we need them in `Service.cpp` @@ -743,9 +773,10 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { visibleVariablesServiceQuery.begin(), visibleVariablesServiceQuery.end()); // Create suitable `parsedQuery::Service` object and return it. - return {std::move(visibleVariablesServiceQuery), std::move(serviceIri), - prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), - static_cast(ctx->SILENT())}; + return parsedQuery::Service{ + std::move(visibleVariablesServiceQuery), std::move(serviceIri), + prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), + static_cast(ctx->SILENT())}; } // ____________________________________________________________________________ diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index e68087b056..aa6ec1e0c9 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -254,7 +254,11 @@ class SparqlQleverVisitor { parsedQuery::GraphPatternOperation visit( Parser::GraphGraphPatternContext* ctx); - parsedQuery::Service visit(Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visit( + Parser::ServiceGraphPatternContext* ctx); + + parsedQuery::GraphPatternOperation visitPathQuery( + Parser::ServiceGraphPatternContext* ctx); parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bcbee2b48e..eaf4b037de 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -161,6 +161,8 @@ addLinkAndDiscoverTest(IdTableTest util) addLinkAndDiscoverTest(TransitivePathTest engine) +addLinkAndDiscoverTest(PathSearchTest engine) + addLinkAndDiscoverTest(BatchedPipelineTest) addLinkAndDiscoverTest(TupleHelpersTest) diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp new file mode 100644 index 0000000000..da8bd31c94 --- /dev/null +++ b/test/PathSearchTest.cpp @@ -0,0 +1,722 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include + +#include "engine/PathSearch.h" +#include "engine/QueryExecutionTree.h" +#include "engine/Result.h" +#include "engine/ValuesForTesting.h" +#include "gmock/gmock.h" +#include "util/IdTableHelpers.h" +#include "util/IdTestHelpers.h" +#include "util/IndexTestHelpers.h" + +using ad_utility::testing::getQec; +namespace { +auto V = ad_utility::testing::VocabId; +auto I = ad_utility::testing::IntId; +using Var = Variable; +using Vars = std::vector>; + +} // namespace + +Result performPathSearch(PathSearchConfiguration config, IdTable input, + Vars vars) { + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(input), vars); + PathSearch p = PathSearch(qec, std::move(subtree), std::move(config)); + + return p.computeResult(false); +} + +TEST(PathSearchTest, constructor) { + auto qec = getQec(); + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + + std::vector sources{V(0)}; + std::vector targets{V(1)}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + PathSearch p = PathSearch(qec, std::move(subtree), config); +} + +TEST(PathSearchTest, emptyGraph) { + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto expected = makeIdTableFromVector({}); + expected.setNumColumns(4); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + */ +TEST(PathSearchTest, singlePath) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathWithProperties) { + auto sub = + makeIdTableFromVector({{0, 1, 10}, {1, 2, 20}, {2, 3, 30}, {3, 4, 40}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(10)}, + {V(1), V(2), I(0), I(1), V(20)}, + {V(2), V(3), I(0), I(2), V(30)}, + {V(3), V(4), I(0), I(3), V(40)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + {V(1), V(2), I(1), I(0), V(1)}, + {V(2), V(3), I(1), I(1), V(1)}, + {V(3), V(4), I(1), I(2), V(1)}, + {V(2), V(3), I(2), I(0), V(2)}, + {V(3), V(4), I(2), I(1), V(2)}, + {V(3), V(4), I(3), I(0), V(3)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathAllTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, + {V(0), V(1), I(2), I(0), V(3)}, + {V(1), V(2), I(2), I(1), V(3)}, + {V(2), V(3), I(2), I(2), V(3)}, + {V(0), V(1), I(3), I(0), V(4)}, + {V(1), V(2), I(3), I(1), V(4)}, + {V(2), V(3), I(3), I(2), V(4)}, + {V(3), V(4), I(3), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * \ / + * > 2 < + */ +TEST(PathSearchTest, twoPathsOneTarget) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); + auto expected = makeIdTableFromVector({ + {V(0), V(3), I(0), I(0)}, + {V(3), V(2), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(2)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * / \ + * 2 < > 4 + */ +TEST(PathSearchTest, twoPathsTwoTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(3), I(0), I(0)}, + {V(3), V(4), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(2), V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 2<---1 + * \ ^ + * \ | + * > 0 + */ +TEST(PathSearchTest, cycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 2<---1--->3 + * \ ^ / + * \ | / + * > 0 < + */ +TEST(PathSearchTest, twoCycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); + auto expected = makeIdTableFromVector({{V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(3)}, + {V(1), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(2)}, + {V(1), V(2), I(2), I(1), V(2)}}); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * / \ + * 1 2 + * \ / \ + * 3 4 + */ +TEST(PathSearchTest, allPaths) { + auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(4)}, + {V(2), V(4), I(1), I(1), V(4)}, + {V(0), V(2), I(2), I(0), V(3)}, + {V(2), V(3), I(2), I(1), V(3)}, + {V(0), V(1), I(3), I(0), V(1)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(3), I(4), I(1), V(3)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsWithPropertiesSwitched) { + auto sub = makeIdTableFromVector({{0, 1, 10, 11}, + {1, 3, 20, 21}, + {0, 2, 30, 31}, + {2, 3, 40, 41}, + {2, 4, 50, 51}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2), V(31), V(30)}, + {V(0), V(2), I(1), I(0), V(4), V(31), V(30)}, + {V(2), V(4), I(1), I(1), V(4), V(51), V(50)}, + {V(0), V(2), I(2), I(0), V(3), V(31), V(30)}, + {V(2), V(3), I(2), I(1), V(3), V(41), V(40)}, + {V(0), V(1), I(3), I(0), V(1), V(11), V(10)}, + {V(0), V(1), I(4), I(0), V(3), V(11), V(10)}, + {V(1), V(3), I(4), I(1), V(3), V(21), V(20)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, + Variable{"?edgeProperty2"}}; + PathSearchConfiguration config{ + PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * |\ + * | \ + * 1->2->3 + */ +TEST(PathSearchTest, allPathsPartialAllTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(3)}, + {V(2), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(1)}, + {V(0), V(1), I(3), I(0), V(2)}, + {V(1), V(2), I(3), I(1), V(2)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(2), I(4), I(1), V(3)}, + {V(2), V(3), I(4), I(2), V(3)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsPartialAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(0)}, + {V(2), V(3), I(0), I(1), V(0)}, + {V(0), V(1), I(1), I(0), V(0)}, + {V(1), V(2), I(1), I(1), V(0)}, + {V(2), V(3), I(1), I(2), V(0)}, + {V(1), V(2), I(2), I(0), V(1)}, + {V(2), V(3), I(2), I(1), V(1)}, + {V(2), V(3), I(3), I(0), V(2)}, + }); + + std::vector targets{V(3)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + * ^ + * / + * 5 + */ +TEST(PathSearchTest, singlePathWithIrrelevantNode) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {5, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * | + * 1 + * / \ + * 2 3 + * \ / + * 4 + * | + * 5 + */ +TEST(PathSearchTest, elongatedDiamond) { + auto sub = + makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(4), V(5), I(0), I(3)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(4), I(1), I(2)}, + {V(4), V(5), I(1), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 4 + * \ / + * 2-->3 + * / \ + * 1 5 + */ +TEST(PathSearchTest, multiSourceMultiTargetallPaths) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(5), I(0), I(2)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(4), I(1), I(2)}, + {V(1), V(2), I(2), I(0)}, + {V(2), V(3), I(2), I(1)}, + {V(3), V(5), I(2), I(2)}, + {V(1), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(3), V(4), I(3), I(2)}, + }); + + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, multiSourceMultiTargetallPathsNotCartesian) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(1), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(5), I(1), I(2)}, + }); + + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}, + false}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, sourceBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sourceTable = makeIdTableFromVector({{0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sourceTreeVars = {Var{"?source"}}; + auto sourceTree = ad_utility::makeExecutionTree( + qec, std::move(sourceTable), sourceTreeVars); + pathSearch.bindSourceSide(sourceTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, targetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto targetTable = makeIdTableFromVector({{4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(4)}, + {V(1), V(2), I(0), I(1), V(4)}, + {V(2), V(3), I(0), I(2), V(4)}, + {V(3), V(4), I(0), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars targetTreeVars = {Var{"?target"}}; + auto targetTree = ad_utility::makeExecutionTree( + qec, std::move(targetTable), targetTreeVars); + pathSearch.bindTargetSide(targetTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, sourceAndTargetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sideTable = makeIdTableFromVector({{0, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0), V(4)}, + {V(1), V(2), I(0), I(1), V(0), V(4)}, + {V(2), V(3), I(0), I(2), V(0), V(4)}, + {V(3), V(4), I(0), I(3), V(0), V(4)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sideTreeVars = {Var{"?source"}, Var{"?target"}}; + auto sideTree = ad_utility::makeExecutionTree( + qec, std::move(sideTable), sideTreeVars); + pathSearch.bindSourceAndTargetSide(sideTree, 0, 1); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 955ebc88ee..5db2454f1a 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -7,7 +7,9 @@ #include "QueryPlannerTestHelpers.h" #include "engine/QueryPlanner.h" #include "engine/SpatialJoin.h" +#include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" +#include "parser/data/Variable.h" #include "util/TripleComponentTestHelpers.h" namespace h = queryPlannerTestHelpers; @@ -783,6 +785,704 @@ TEST(QueryPlanner, TransitivePathBindRight) { ad_utility::testing::getQec("

.

")); } +TEST(QueryPlanner, PathSearchSingleTarget) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian true;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}, + false}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian false;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchWithEdgeProperties) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))), + qec); +} + +TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto join = h::UnorderedJoins; + auto qec = ad_utility::testing::getQec( + " ." + " ." + " ." + " ." + " ." + " "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{ + PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle"), Variable("?middleAttribute")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "pathSearch:edgeProperty ?middleAttribute;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?middleAttribute." + "?middle ?end." + "}}}}", + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), + qec); +} + +TEST(QueryPlanner, PathSearchJoinOnEdgeProperty) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?middle {} " + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + join(h::Sort(h::ValuesClause("VALUES (?middle) { () }")), + h::Sort( + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))))), + qec); +} + +TEST(QueryPlanner, PathSearchSourceBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?source {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }")), + qec); +} + +TEST(QueryPlanner, PathSearchTargetBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?target {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?target) { () }")), + qec); +} + +TEST(QueryPlanner, PathSearchBothBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source ?target) {( )}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source\t?target) { ( ) }")), + qec); +} + +TEST(QueryPlanner, PathSearchBothBoundIndividually) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source) {()}" + "VALUES (?target) {()}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }"), + h::ValuesClause("VALUES (?target) { () }")), + qec); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingStart) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleStarts) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start1;" + "pathSearch:start ?start2;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'start' has already been set " + "to variable: '?start1'. New variable: '?start2'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingEnd) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'end'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleEnds) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end1;" + "pathSearch:end ?end2;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'end' has already been set " + "to variable: '?end1'. New variable: '?end2'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchStartNotVariable) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("The value for parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchPredicateNotIri) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path ?algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Predicates must be IRIs"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedArgument) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + " ?error;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported argument in PathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchTwoVariablesForSource) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Only one variable is allowed per search side"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedElement) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "VALUES ?middle {}" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported element in pathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedAlgorithm) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:shortestPath ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported algorithm in pathSearch"), + parsedQuery::PathSearchException); +} + TEST(QueryPlanner, SpatialJoinViaMaxDistPredicate) { auto scan = h::IndexScanFromStrings; h::expect( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 17622ee5eb..2457284c87 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -8,6 +8,7 @@ #include #include +#include #include "./util/GTestHelpers.h" #include "engine/Bind.h" @@ -22,6 +23,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/QueryExecutionTree.h" #include "engine/QueryPlanner.h" #include "engine/Service.h" @@ -292,6 +294,38 @@ inline auto TransitivePath = TransitivePathSideMatcher(right)))); }; +inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { + auto sourceMatcher = + AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); + auto targetMatcher = + AD_FIELD(PathSearchConfiguration, targets_, Eq(config.targets_)); + return AllOf( + AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), + sourceMatcher, targetMatcher, + AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), + AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), + AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), + AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), + AD_FIELD(PathSearchConfiguration, edgeProperties_, + UnorderedElementsAreArray(config.edgeProperties_))); +}; + +// Match a PathSearch operation +inline auto PathSearch = + [](PathSearchConfiguration config, bool sourceBound, bool targetBound, + const std::same_as auto&... childMatchers) { + return RootOperation<::PathSearch>(AllOf( + children(childMatchers...), + AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)), + AD_PROPERTY(PathSearch, isSourceBound, Eq(sourceBound)), + AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); + }; + +inline auto ValuesClause = [](string cacheKey) { + return RootOperation<::Values>( + AllOf(AD_PROPERTY(Values, getCacheKey, cacheKey))); +}; + // Match a SpatialJoin operation, set arguments to ignore to -1 inline auto SpatialJoin = [](size_t maxDist, size_t maxResults,