diff --git a/docs/path_search.md b/docs/path_search.md new file mode 100644 index 0000000000..10ae4e0f51 --- /dev/null +++ b/docs/path_search.md @@ -0,0 +1,290 @@ +# Path Search Feature Documentation for SPARQL Engine + +## Overview + +The Path Search feature in this SPARQL engine allows users to perform advanced queries +to find paths between sources and targets in a graph. It supports a variety of configurations, +including single or multiple source and target nodes, optional edge properties, and +custom algorithms for path discovery. This feature is accessed using the `SERVICE` keyword +and the service IRI ``. + +## Basic Syntax + +The general structure of a Path Search query is as follows: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; # Specify the algorithm + pathSearch:source ; # Specify the source node(s) + pathSearch:target ; # Specify the target node(s) + pathSearch:pathColumn ?path ; # Bind the path variable + pathSearch:edgeColumn ?edge ; # Bind the edge variable + pathSearch:start ?start ; # Bind the edge start variable + pathSearch:end ?end ; # Bind the edge end variable + {SELECT * WHERE { + ?start ?end. # Define the edge pattern + }} + } +} +``` + +### Parameters + +- **pathSearch:algorithm**: Defines the algorithm used to search paths. Currently, only `pathSearch:allPaths` is supported. +- **pathSearch:source**: Defines the source node(s) of the search. +- **pathSearch:target** (optional): Defines the target node(s) of the search. +- **pathSearch:pathColumn**: Defines the variable for the path. +- **pathSearch:edgeColumn**: Defines the variable for the edge. +- **pathSearch:start**: Defines the variable for the start of the edges. +- **pathSearch:end**: Defines the variable for the end of the edges. +- **pathSearch:edgeProperty** (optional): Specifies properties for the edges in the path. +- **pathSearch:cartesian** (optional): Controls the behaviour of path searches between + source and target nodes. Expects a boolean. The default is `true`. + - If set to `true`, the search will compute the paths from each source to **all targets** + - If set to `false`, the search will compute the paths from each source to exactly + **one target**. Sources and targets are paired based on their index (i.e. the paths + from the first source to the first target are searched, then the second source and + target, and so on). + + +### Example 1: Single Source and Target + +The simplest case is searching for paths between a single source and a single target: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 2: Multiple Sources or Targets + +It is possible to specify a set of sources or targets for the path search. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +This query will search forall between all sources and all targets, i.e. +- (``, ``) +- (``, ``) +- (``, ``) +- (``, ``) + +It is possible to specify, whether the sources and targets should be combined according +to the cartesian product (as seen above) or if they should be matched up pairwise, i.e. +- (``, ``) +- (``, ``) + +This can be done with the parameter `pathSearch:cartesian`. This parameter expects a +boolean. If set to `true`, then the cartesian product is used to match the sources with +the targets. +If set to `false`, then the sources and targets are matched pairwise. If left +unspecified, then the default `true` is used. + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:source ; + pathSearch:target ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + pathSearch:cartesian false; + { + SELECT * WHERE { + ?start ?end. + } + } + } +} +``` + +### Example 3: Edge Properties + +You can also include edge properties in the path search to further refine the results: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + } + } + } +} +``` + +This is esecially useful for [N-ary relations](https://www.w3.org/TR/swbp-n-aryRelations/). +Considering the example above, it is possible to query additional relations of `?middle`: + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:edgeProperty ?middle ; + pathSearch:edgeProperty ?edgeInfo ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start ?middle. + ?middle ?end. + ?middle ?edgeInfo. + } + } + } +} +``` + +This makes it possible to query additional properties of the edge between `?start` and `?end` (such as `?edgeInfo` in the example above). + + +### Example 4: Source or Target as Variables + +You can also bind the source and/or target dynamically using variables. The examples +below use `VALUES` clauses, which can be convenient to specify sources and targets. +However, the source/target variables can also be bound using any regular SPARQL construct. + +#### Source Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?source {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ?source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +#### Target Variable + +```sparql +PREFIX pathSearch: + +SELECT ?start ?end ?path ?edge WHERE { + VALUES ?target {} + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ?target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:start ?start ; + pathSearch:end ?end ; + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +## Error Handling + +The Path Search feature will throw errors in the following scenarios: + +- **Missing Start Parameter**: If the `start` parameter is not specified, an error will be raised. +- **Multiple Start or End Variables**: If multiple `start` or `end` variables are defined, an error is raised. +- **Invalid Non-Variable Start/End**: If the `start` or `end` parameter is not bound to a variable, the query will fail. +- **Unsupported Argument**: Arguments other than those listed (like custom user arguments) will cause an error. +- **Non-IRI Predicate**: Predicates must be IRIs. If not, an error will occur. + +### Example: Missing Start Parameter + +```sparql +PREFIX pathSearch: +SELECT ?start ?end ?path ?edge WHERE { + SERVICE pathSearch: { + _:path pathSearch:algorithm pathSearch:allPaths ; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path ; + pathSearch:edgeColumn ?edge ; + pathSearch:end ?end ; # Missing start + { + SELECT * WHERE { + ?start

?end. + } + } + } +} +``` + +This query would fail with a "Missing parameter 'start'" error. + diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index a945eeb574..1fc78430be 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -1017,6 +1017,34 @@ queries: - contains_row: [""] - contains_row: ["1.87"] + - query: path_search_all_paths + type: no-text + sparql: | + PREFIX pathSearch: + SELECT * WHERE { + SERVICE pathSearch: { + pathSearch: pathSearch:algorithm pathSearch:allPaths; + pathSearch:source ; + pathSearch:target ; + pathSearch:pathColumn ?path; + pathSearch:edgeColumn ?edge; + pathSearch:start ?start; + pathSearch:end ?end; + {SELECT * WHERE { + ?start ?end + }} + } + } + checks: + - num_rows: 17 + - num_cols: 4 + - selected: ["?path", "?edge", "?start", "?end"] + - contains_row: ["0", "0", "", ""] + - contains_row: ["0", "1", "", ""] + - contains_row: ["0", "2", "", ""] + - contains_row: ["4", "0", "", ""] + - contains_row: ["4", "1", "", ""] + - query : property_path_inverse type: no-text diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 0f37137531..41a9a33a68 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -13,5 +13,5 @@ add_library(engine VariableToColumnMap.cpp ExportQueryExecutionTrees.cpp CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp - CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp) + CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp index fd153adc0d..9bfe8c4734 100644 --- a/src/engine/CheckUsePatternTrick.cpp +++ b/src/engine/CheckUsePatternTrick.cpp @@ -69,7 +69,8 @@ bool isVariableContainedInGraphPatternOperation( } else if constexpr (std::is_same_v) { return ad_utility::contains(arg.visibleVariables_, variable); } else { - static_assert(std::is_same_v); + static_assert(std::is_same_v || + std::is_same_v); // The `TransPath` is set up later in the query planning, when this // function should not be called anymore. AD_FAIL(); diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp new file mode 100644 index 0000000000..50f10210a6 --- /dev/null +++ b/src/engine/PathSearch.cpp @@ -0,0 +1,460 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include "PathSearch.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "engine/CallFixedSize.h" +#include "engine/QueryExecutionTree.h" +#include "engine/VariableToColumnMap.h" +#include "util/AllocatorWithLimit.h" + +using namespace pathSearch; + +// _____________________________________________________________________________ +BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, + size_t endCol, std::vector edgeCols) + : table_(table), + startCol_(startCol), + endCol_(endCol), + edgeCols_(std::move(edgeCols)) {} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::outgoingEdes(const Id node) const { + auto startIds = table_.getColumn(startCol_); + auto range = std::ranges::equal_range(startIds, node); + auto startIndex = std::distance(startIds.begin(), range.begin()); + + std::vector edges; + for (size_t i = 0; i < range.size(); i++) { + auto row = startIndex + i; + auto edge = makeEdgeFromRow(row); + edges.push_back(edge); + } + return edges; +} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::getSources() const { + auto startIds = table_.getColumn(startCol_); + std::vector sources; + std::ranges::unique_copy(startIds, std::back_inserter(sources)); + + return sources; +} + +// _____________________________________________________________________________ +std::vector BinSearchWrapper::getEdgeProperties(const Edge& edge) const { + std::vector edgeProperties; + for (auto edgeCol : edgeCols_) { + edgeProperties.push_back(table_(edge.edgeRow_, edgeCol)); + } + return edgeProperties; +} + +// _____________________________________________________________________________ +Edge BinSearchWrapper::makeEdgeFromRow(size_t row) const { + Edge edge; + edge.start_ = table_(row, startCol_); + edge.end_ = table_(row, endCol_); + edge.edgeRow_ = row; + + return edge; +} + +// _____________________________________________________________________________ +PathSearch::PathSearch(QueryExecutionContext* qec, + std::shared_ptr subtree, + PathSearchConfiguration config) + : Operation(qec), subtree_(std::move(subtree)), config_(std::move(config)) { + AD_CORRECTNESS_CHECK(qec != nullptr); + + auto startCol = subtree_->getVariableColumn(config_.start_); + auto endCol = subtree_->getVariableColumn(config_.end_); + subtree_ = QueryExecutionTree::createSortedTree(subtree_, {startCol, endCol}); + + resultWidth_ = 4 + config_.edgeProperties_.size(); + + size_t colIndex = 0; + + variableColumns_[config_.start_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.end_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.pathColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + variableColumns_[config_.edgeColumn_] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + + if (std::holds_alternative(config_.sources_)) { + resultWidth_++; + const auto& sourceColumn = std::get(config_.sources_); + variableColumns_[sourceColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + } + + if (std::holds_alternative(config_.targets_)) { + resultWidth_++; + const auto& targetColumn = std::get(config_.targets_); + variableColumns_[targetColumn] = makeAlwaysDefinedColumn(colIndex); + colIndex++; + } + + for (const auto& edgeProperty : config_.edgeProperties_) { + auto subVarCols = subtree_->getVariableColumns(); + auto colInfo = subVarCols[edgeProperty]; + variableColumns_[edgeProperty] = {colIndex, colInfo.mightContainUndef_}; + colIndex++; + } +} + +// _____________________________________________________________________________ +std::vector PathSearch::getChildren() { + std::vector res; + res.push_back(subtree_.get()); + + if (sourceAndTargetTree_.has_value()) { + res.push_back(sourceAndTargetTree_.value().get()); + } else { + if (sourceTree_.has_value()) { + res.push_back(sourceTree_.value().get()); + } + + if (targetTree_.has_value()) { + res.push_back(targetTree_.value().get()); + } + } + + return res; +}; + +// _____________________________________________________________________________ +std::string PathSearch::getCacheKeyImpl() const { + std::ostringstream os; + os << "PathSearch:\n"; + os << config_.toString(); + + AD_CORRECTNESS_CHECK(subtree_); + os << "Subtree:\n" << subtree_->getCacheKey() << '\n'; + + if (sourceTree_.has_value()) { + os << "Source Side subtree:\n" + << sourceTree_.value()->getCacheKey() << '\n'; + } + + if (targetTree_.has_value()) { + os << "Target Side subtree:\n" + << targetTree_.value()->getCacheKey() << '\n'; + } + + if (sourceAndTargetTree_.has_value()) { + os << "Source And Target Side subtree:\n" + << sourceAndTargetTree_.value()->getCacheKey() << '\n'; + } + + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +string PathSearch::getDescriptor() const { + std::ostringstream os; + os << "PathSearch"; + return std::move(os).str(); +}; + +// _____________________________________________________________________________ +size_t PathSearch::getResultWidth() const { return resultWidth_; }; + +// _____________________________________________________________________________ +size_t PathSearch::getCostEstimate() { + // TODO: Figure out a smart way to estimate cost + return 1000; +}; + +// _____________________________________________________________________________ +uint64_t PathSearch::getSizeEstimateBeforeLimit() { + // TODO: Figure out a smart way to estimate size + return 1000; +}; + +// _____________________________________________________________________________ +float PathSearch::getMultiplicity(size_t col) { + (void)col; + return 1; +}; + +// _____________________________________________________________________________ +bool PathSearch::knownEmptyResult() { + for (auto child : getChildren()) { + if (child->knownEmptyResult()) { + return true; + } + } + return false; +}; + +// _____________________________________________________________________________ +vector PathSearch::resultSortedOn() const { return {}; }; + +// _____________________________________________________________________________ +void PathSearch::bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol) { + sourceTree_ = sourcesOp; + sourceCol_ = inputCol; +} + +// _____________________________________________________________________________ +void PathSearch::bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol) { + targetTree_ = targetsOp; + targetCol_ = inputCol; +} + +// _____________________________________________________________________________ +void PathSearch::bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol) { + sourceAndTargetTree_ = sourceAndTargetOp; + sourceCol_ = sourceCol; + targetCol_ = targetCol; +} + +// _____________________________________________________________________________ +Result PathSearch::computeResult([[maybe_unused]] bool requestLaziness) { + std::shared_ptr subRes = subtree_->getResult(); + IdTable idTable{allocator()}; + idTable.setNumColumns(getResultWidth()); + + const IdTable& dynSub = subRes->idTable(); + if (!dynSub.empty()) { + auto timer = ad_utility::Timer(ad_utility::Timer::Started); + + auto subStartColumn = subtree_->getVariableColumn(config_.start_); + auto subEndColumn = subtree_->getVariableColumn(config_.end_); + std::vector edgeColumns; + for (const auto& edgeProp : config_.edgeProperties_) { + edgeColumns.push_back(subtree_->getVariableColumn(edgeProp)); + } + BinSearchWrapper binSearch{dynSub, subStartColumn, subEndColumn, + std::move(edgeColumns)}; + + timer.stop(); + auto buildingTime = timer.msecs(); + timer.start(); + + auto [sources, targets] = handleSearchSides(); + + timer.stop(); + auto sideTime = timer.msecs(); + timer.start(); + + PathsLimited paths{allocator()}; + std::vector allSources; + if (sources.empty()) { + allSources = binSearch.getSources(); + sources = allSources; + } + paths = allPaths(sources, targets, binSearch, config_.cartesian_); + + timer.stop(); + auto searchTime = timer.msecs(); + timer.start(); + + CALL_FIXED_SIZE(std::array{getResultWidth()}, + &PathSearch::pathsToResultTable, this, idTable, paths, + binSearch); + + timer.stop(); + auto fillTime = timer.msecs(); + timer.start(); + + auto& info = runtimeInfo(); + info.addDetail("Time to build graph & mapping", buildingTime.count()); + info.addDetail("Time to prepare search sides", sideTime.count()); + info.addDetail("Time to search paths", searchTime.count()); + info.addDetail("Time to fill result table", fillTime.count()); + } + + return {std::move(idTable), resultSortedOn(), subRes->getSharedLocalVocab()}; +}; + +// _____________________________________________________________________________ +VariableToColumnMap PathSearch::computeVariableToColumnMap() const { + return variableColumns_; +}; + +// _____________________________________________________________________________ +std::pair, std::span> +PathSearch::handleSearchSides() const { + std::span sourceIds; + std::span targetIds; + + if (sourceAndTargetTree_.has_value()) { + auto resultTable = sourceAndTargetTree_.value()->getResult(); + sourceIds = resultTable->idTable().getColumn(sourceCol_.value()); + targetIds = resultTable->idTable().getColumn(targetCol_.value()); + return {sourceIds, targetIds}; + } + + if (sourceTree_.has_value()) { + sourceIds = sourceTree_.value()->getResult()->idTable().getColumn( + sourceCol_.value()); + } else if (config_.sourceIsVariable()) { + sourceIds = {}; + } else { + sourceIds = std::get>(config_.sources_); + } + + if (targetTree_.has_value()) { + targetIds = targetTree_.value()->getResult()->idTable().getColumn( + targetCol_.value()); + } else if (config_.targetIsVariable()) { + targetIds = {}; + } else { + targetIds = std::get>(config_.targets_); + } + + return {sourceIds, targetIds}; +} + +// _____________________________________________________________________________ +PathsLimited PathSearch::findPaths(const Id& source, + const std::unordered_set& targets, + const BinSearchWrapper& binSearch) const { + std::vector edgeStack; + Path currentPath{EdgesLimited(allocator())}; + std::unordered_map< + uint64_t, PathsLimited, std::hash, std::equal_to, + ad_utility::AllocatorWithLimit>> + pathCache{allocator()}; + PathsLimited result{allocator()}; + std::unordered_set, std::equal_to, + ad_utility::AllocatorWithLimit> + visited{allocator()}; + + visited.insert(source.getBits()); + for (auto edge : binSearch.outgoingEdes(source)) { + edgeStack.push_back(std::move(edge)); + } + + while (!edgeStack.empty()) { + checkCancellation(); + auto edge = edgeStack.back(); + edgeStack.pop_back(); + + visited.insert(edge.end_.getBits()); + + while (!currentPath.empty() && edge.start_ != currentPath.end()) { + visited.erase(currentPath.end().getBits()); + currentPath.pop_back(); + } + + currentPath.push_back(edge); + + if (targets.empty() || targets.contains(edge.end_.getBits())) { + result.push_back(currentPath); + } + + for (const auto& outgoingEdge : binSearch.outgoingEdes(edge.end_)) { + if (!visited.contains(outgoingEdge.end_.getBits())) { + edgeStack.push_back(outgoingEdge); + } + } + } + + return result; +} + +// _____________________________________________________________________________ +PathsLimited PathSearch::allPaths(std::span sources, + std::span targets, + const BinSearchWrapper& binSearch, + bool cartesian) const { + PathsLimited paths{allocator()}; + Path path{EdgesLimited(allocator())}; + + if (cartesian || sources.size() != targets.size()) { + std::unordered_set targetSet; + for (auto target : targets) { + targetSet.insert(target.getBits()); + } + for (auto source : sources) { + for (const auto& path : findPaths(source, targetSet, binSearch)) { + paths.push_back(path); + } + } + } else { + for (size_t i = 0; i < sources.size(); i++) { + for (const auto& path : + findPaths(sources[i], {targets[i].getBits()}, binSearch)) { + paths.push_back(path); + } + } + } + return paths; +} + +// _____________________________________________________________________________ +template +void PathSearch::pathsToResultTable(IdTable& tableDyn, PathsLimited& paths, + const BinSearchWrapper& binSearch) const { + IdTableStatic table = std::move(tableDyn).toStatic(); + + std::vector edgePropertyCols; + for (const auto& edgeVar : config_.edgeProperties_) { + auto edgePropertyCol = variableColumns_.at(edgeVar).columnIndex_; + edgePropertyCols.push_back(edgePropertyCol); + } + + size_t rowIndex = 0; + for (size_t pathIndex = 0; pathIndex < paths.size(); pathIndex++) { + auto path = paths[pathIndex]; + + std::optional sourceId = std::nullopt; + if (config_.sourceIsVariable()) { + sourceId = path.edges_.front().start_; + } + + std::optional targetId = std::nullopt; + if (config_.targetIsVariable()) { + targetId = path.edges_.back().end_; + } + + for (size_t edgeIndex = 0; edgeIndex < path.size(); edgeIndex++) { + checkCancellation(); + auto edge = path.edges_[edgeIndex]; + table.emplace_back(); + table(rowIndex, getStartIndex()) = edge.start_; + table(rowIndex, getEndIndex()) = edge.end_; + table(rowIndex, getPathIndex()) = Id::makeFromInt(pathIndex); + table(rowIndex, getEdgeIndex()) = Id::makeFromInt(edgeIndex); + + if (sourceId) { + table(rowIndex, getSourceIndex().value()) = sourceId.value(); + } + + if (targetId) { + table(rowIndex, getTargetIndex().value()) = targetId.value(); + } + + auto edgeProperties = binSearch.getEdgeProperties(edge); + for (size_t edgePropertyIndex = 0; + edgePropertyIndex < edgeProperties.size(); edgePropertyIndex++) { + table(rowIndex, edgePropertyCols[edgePropertyIndex]) = + edgeProperties[edgePropertyIndex]; + } + + rowIndex++; + } + } + + tableDyn = std::move(table).toDynamic(); +} diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h new file mode 100644 index 0000000000..9e330d1d4e --- /dev/null +++ b/src/engine/PathSearch.h @@ -0,0 +1,282 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#pragma once + +#include +#include +#include +#include +#include + +#include "engine/Operation.h" +#include "global/Id.h" +#include "util/AllocatorWithLimit.h" + +enum class PathSearchAlgorithm { ALL_PATHS }; + +/** + * @brief Represents the source or target side of a PathSearch. + * The side can either be a variable or a list of Ids. + */ +using SearchSide = std::variant>; + +namespace pathSearch { +struct Edge { + Id start_; + + Id end_; + + size_t edgeRow_; +}; + +using EdgesLimited = std::vector>; + +struct Path { + EdgesLimited edges_; + + bool empty() const { return edges_.empty(); } + + size_t size() const { return edges_.size(); } + + void push_back(const Edge& edge) { edges_.push_back(edge); } + + void pop_back() { edges_.pop_back(); } + + const Id& end() { return edges_.back().end_; } +}; + +using PathsLimited = std::vector>; + +/** + * @class BinSearchWrapper + * @brief Encapsulates logic for binary search of edges in + * an IdTable. It provides methods to find outgoing edges from + * a node and retrie + * + */ +class BinSearchWrapper { + const IdTable& table_; + size_t startCol_; + size_t endCol_; + std::vector edgeCols_; + + public: + BinSearchWrapper(const IdTable& table, size_t startCol, size_t endCol, + std::vector edgeCols); + + /** + * @brief Return all outgoing edges of a node + * + * @param node The start node of the outgoing edges + */ + std::vector outgoingEdes(const Id node) const; + + /** + * @brief Returns the start nodes of all edges. + * In case the sources field for the path search is empty, + * the search starts from all possible sources (i.e. all + * start nodes). Returns only unique start nodes. + */ + std::vector getSources() const; + + std::vector getEdgeProperties(const Edge& edge) const; + + private: + Edge makeEdgeFromRow(size_t row) const; +}; +} // namespace pathSearch + +struct PathSearchConfiguration { + PathSearchAlgorithm algorithm_; + SearchSide sources_; + SearchSide targets_; + Variable start_; + Variable end_; + Variable pathColumn_; + Variable edgeColumn_; + std::vector edgeProperties_; + bool cartesian_ = true; + + bool sourceIsVariable() const { + return std::holds_alternative(sources_); + } + bool targetIsVariable() const { + return std::holds_alternative(targets_); + } + + std::string searchSideToString(const SearchSide& side) const { + if (std::holds_alternative(side)) { + return std::get(side).toSparql(); + } + std::ostringstream os; + for (auto id : std::get>(side)) { + os << id << ", "; + } + return std::move(os).str(); + } + + std::string toString() const { + std::ostringstream os; + if (algorithm_ == PathSearchAlgorithm::ALL_PATHS) { + os << "Algorithm: All paths" << '\n'; + } + + os << "Source: " << searchSideToString(sources_) << '\n'; + os << "Target: " << searchSideToString(targets_) << '\n'; + + os << "Start: " << start_.toSparql() << '\n'; + os << "End: " << end_.toSparql() << '\n'; + os << "PathColumn: " << pathColumn_.toSparql() << '\n'; + os << "EdgeColumn: " << edgeColumn_.toSparql() << '\n'; + + os << "EdgeProperties:" << '\n'; + for (const auto& edgeProperty : edgeProperties_) { + os << " " << edgeProperty.toSparql() << '\n'; + } + + return std::move(os).str(); + } +}; + +/** + * @class PathSearch + * @brief Main class implementing the path search operation. + * It manages the configuration, executes the search and + * builds the ResultTable. + * + */ +class PathSearch : public Operation { + std::shared_ptr subtree_; + size_t resultWidth_; + VariableToColumnMap variableColumns_; + + PathSearchConfiguration config_; + + // The following optional fields are filled, depending + // on how the PathSearch is bound. + std::optional sourceCol_; + std::optional targetCol_; + + std::optional> sourceTree_; + std::optional> targetTree_; + std::optional> sourceAndTargetTree_; + + public: + PathSearch(QueryExecutionContext* qec, + std::shared_ptr subtree, + PathSearchConfiguration config); + + std::vector getChildren() override; + + const PathSearchConfiguration& getConfig() const { return config_; } + + ColumnIndex getStartIndex() const { + return variableColumns_.at(config_.start_).columnIndex_; + } + ColumnIndex getEndIndex() const { + return variableColumns_.at(config_.end_).columnIndex_; + } + ColumnIndex getPathIndex() const { + return variableColumns_.at(config_.pathColumn_).columnIndex_; + } + ColumnIndex getEdgeIndex() const { + return variableColumns_.at(config_.edgeColumn_).columnIndex_; + } + std::optional getSourceIndex() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + const auto& sourceVar = std::get(config_.sources_); + return variableColumns_.at(sourceVar).columnIndex_; + } + std::optional getTargetIndex() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + const auto& targetVar = std::get(config_.targets_); + return variableColumns_.at(targetVar).columnIndex_; + } + + string getCacheKeyImpl() const override; + string getDescriptor() const override; + size_t getResultWidth() const override; + + size_t getCostEstimate() override; + + uint64_t getSizeEstimateBeforeLimit() override; + float getMultiplicity(size_t col) override; + bool knownEmptyResult() override; + + vector resultSortedOn() const override; + + void bindSourceSide(std::shared_ptr sourcesOp, + size_t inputCol); + void bindTargetSide(std::shared_ptr targetsOp, + size_t inputCol); + + void bindSourceAndTargetSide( + std::shared_ptr sourceAndTargetOp, size_t sourceCol, + size_t targetCol); + + bool isSourceBound() const { + return sourceTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.sourceIsVariable(); + } + + bool isTargetBound() const { + return targetTree_.has_value() || sourceAndTargetTree_.has_value() || + !config_.targetIsVariable(); + } + + std::optional getSourceColumn() const { + if (!config_.sourceIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.sources_)) + .columnIndex_; + } + + std::optional getTargetColumn() const { + if (!config_.targetIsVariable()) { + return std::nullopt; + } + + return variableColumns_.at(std::get(config_.targets_)) + .columnIndex_; + } + + Result computeResult([[maybe_unused]] bool requestLaziness) override; + VariableToColumnMap computeVariableToColumnMap() const override; + + private: + std::pair, std::span> handleSearchSides() const; + + /** + * @brief Finds paths based on the configured algorithm. + * @return A vector of paths. + */ + pathSearch::PathsLimited findPaths( + const Id& source, const std::unordered_set& targets, + const pathSearch::BinSearchWrapper& binSearch) const; + + /** + * @brief Finds all paths in the graph. + * @return A vector of all paths. + */ + pathSearch::PathsLimited allPaths( + std::span sources, std::span targets, + const pathSearch::BinSearchWrapper& binSearch, bool cartesian) const; + + /** + * @brief Converts paths to a result table with a specified width. + * @tparam WIDTH The width of the result table. + * @param tableDyn The dynamic table to store the results. + * @param paths The vector of paths to convert. + */ + template + void pathsToResultTable(IdTable& tableDyn, pathSearch::PathsLimited& paths, + const pathSearch::BinSearchWrapper& binSearch) const; +}; diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index aeb247d45e..b63958e198 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -9,6 +9,7 @@ #include #include +#include #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" @@ -26,6 +27,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/Service.h" #include "engine/Sort.h" #include "engine/SpatialJoin.h" @@ -37,9 +39,11 @@ #include "engine/Values.h" #include "engine/sparqlExpressions/LiteralExpression.h" #include "engine/sparqlExpressions/RelationalExpressions.h" +#include "global/Id.h" #include "global/RuntimeParameters.h" #include "parser/Alias.h" #include "parser/SparqlParserHelpers.h" +#include "util/Exception.h" namespace p = parsedQuery; namespace { @@ -1820,6 +1824,11 @@ std::vector QueryPlanner::createJoinCandidates( return {makeSubtreePlan(_qec, a._qet, b._qet)}; } + if (auto opt = createJoinWithPathSearch(a, b, jcs)) { + candidates.push_back(std::move(opt.value())); + return candidates; + } + // Check if one of the two Operations is a SERVICE. If so, we can try // to simplify the Service Query using the result of the other operation. if (auto opt = createJoinWithService(a, b, jcs)) { @@ -2064,6 +2073,98 @@ auto QueryPlanner::createSubtreeWithService(const SubtreePlan& a, return plan; } +// _____________________________________________________________________ +auto QueryPlanner::createJoinWithPathSearch( + const SubtreePlan& a, const SubtreePlan& b, + const std::vector>& jcs) + -> std::optional { + auto aRootOp = + std::dynamic_pointer_cast(a._qet->getRootOperation()); + auto bRootOp = + std::dynamic_pointer_cast(b._qet->getRootOperation()); + + // Exactly one of the two Operations can be a path search. + if (static_cast(aRootOp) == static_cast(bRootOp)) { + return std::nullopt; + } + + auto pathSearch = aRootOp ? aRootOp : bRootOp; + auto sibling = bRootOp ? a : b; + + auto decideColumns = [aRootOp](std::array joinColumns) + -> std::pair { + auto thisCol = aRootOp ? joinColumns[0] : joinColumns[1]; + auto otherCol = aRootOp ? joinColumns[1] : joinColumns[0]; + return {thisCol, otherCol}; + }; + + // Only source and target may be bound directly + if (jcs.size() > 2) { + return std::nullopt; + } + + auto sourceColumn = pathSearch->getSourceColumn(); + auto targetColumn = pathSearch->getTargetColumn(); + + // Either source or target column have to be a variable to create a join + if (!sourceColumn && !targetColumn) { + return std::nullopt; + } + + // A join on an edge property column should not create any candidates + auto isJoinOnSourceOrTarget = [sourceColumn, + targetColumn](size_t joinColumn) { + return ((sourceColumn && sourceColumn.value() == joinColumn) || + (targetColumn && targetColumn.value() == joinColumn)); + }; + + if (jcs.size() == 2) { + // To join source and target, both must be variables + if (!sourceColumn || !targetColumn) { + return std::nullopt; + } + + auto [firstCol, firstOtherCol] = decideColumns(jcs[0]); + + auto [secondCol, secondOtherCol] = decideColumns(jcs[1]); + + if (!isJoinOnSourceOrTarget(firstCol) && + !isJoinOnSourceOrTarget(secondCol)) { + return std::nullopt; + } + + if (sourceColumn == firstCol && targetColumn == secondCol) { + pathSearch->bindSourceAndTargetSide(sibling._qet, firstOtherCol, + secondOtherCol); + } else if (sourceColumn == secondCol && targetColumn == firstCol) { + pathSearch->bindSourceAndTargetSide(sibling._qet, secondOtherCol, + firstOtherCol); + } else { + return std::nullopt; + } + } else if (jcs.size() == 1) { + auto [thisCol, otherCol] = decideColumns(jcs[0]); + + if (!isJoinOnSourceOrTarget(thisCol)) { + return std::nullopt; + } + + if (sourceColumn && sourceColumn == thisCol && + !pathSearch->isSourceBound()) { + pathSearch->bindSourceSide(sibling._qet, otherCol); + } else if (targetColumn && targetColumn == thisCol && + !pathSearch->isTargetBound()) { + pathSearch->bindTargetSide(sibling._qet, otherCol); + } + } else { + return std::nullopt; + } + + SubtreePlan plan = makeSubtreePlan(pathSearch); + mergeSubtreePlanIds(plan, a, b); + return plan; +} + // _____________________________________________________________________ void QueryPlanner::QueryGraph::setupGraph( const std::vector& leafOperations) { @@ -2289,6 +2390,8 @@ void QueryPlanner::GraphPatternPlanner::graphPatternOperationVisitor(Arg& arg) { c.type = SubtreePlan::MINUS; } visitGroupOptionalOrMinus(std::move(candidates)); + } else if constexpr (std::is_same_v) { + visitPathSearch(arg); } else { static_assert(std::is_same_v); visitBasicGraphPattern(arg); @@ -2396,6 +2499,24 @@ void QueryPlanner::GraphPatternPlanner::visitTransitivePath( visitGroupOptionalOrMinus(std::move(candidatesOut)); } +// _______________________________________________________________ +void QueryPlanner::GraphPatternPlanner::visitPathSearch( + parsedQuery::PathQuery& pathQuery) { + auto candidatesIn = planner_.optimize(&pathQuery.childGraphPattern_); + std::vector candidatesOut; + + const auto& vocab = planner_._qec->getIndex().getVocab(); + auto config = pathQuery.toPathSearchConfiguration(vocab); + + for (auto& sub : candidatesIn) { + auto pathSearch = + std::make_shared(qec_, std::move(sub._qet), config); + auto plan = makeSubtreePlan(std::move(pathSearch)); + candidatesOut.push_back(std::move(plan)); + } + visitGroupOptionalOrMinus(std::move(candidatesOut)); +} + // _______________________________________________________________ void QueryPlanner::GraphPatternPlanner::visitUnion(parsedQuery::Union& arg) { // TODO here we could keep all the candidates, and create a diff --git a/src/engine/QueryPlanner.h b/src/engine/QueryPlanner.h index 360a40d556..8d3a13cd3b 100644 --- a/src/engine/QueryPlanner.h +++ b/src/engine/QueryPlanner.h @@ -344,6 +344,10 @@ class QueryPlanner { const SubtreePlan& a, const SubtreePlan& b, const std::vector>& jcs); + [[nodiscard]] static std::optional createJoinWithPathSearch( + const SubtreePlan& a, const SubtreePlan& b, + const std::vector>& jcs); + template [[nodiscard]] static std::optional createSubtreeWithService( const SubtreePlan& a, const SubtreePlan& b); @@ -532,6 +536,7 @@ class QueryPlanner { void visitBasicGraphPattern(const parsedQuery::BasicGraphPattern& pattern); void visitBind(const parsedQuery::Bind& bind); void visitTransitivePath(parsedQuery::TransPath& transitivePath); + void visitPathSearch(parsedQuery::PathQuery& config); void visitUnion(parsedQuery::Union& un); void visitSubquery(parsedQuery::Subquery& subquery); diff --git a/src/parser/GraphPatternOperation.cpp b/src/parser/GraphPatternOperation.cpp index bddd21c5e9..efceda159c 100644 --- a/src/parser/GraphPatternOperation.cpp +++ b/src/parser/GraphPatternOperation.cpp @@ -5,9 +5,14 @@ #include "parser/GraphPatternOperation.h" +#include +#include + #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "parser/ParsedQuery.h" +#include "parser/TripleComponent.h" +#include "util/Exception.h" #include "util/Forward.h" namespace parsedQuery { @@ -67,6 +72,146 @@ void BasicGraphPattern::appendTriples(BasicGraphPattern other) { ad_utility::appendVector(_triples, std::move(other._triples)); } +// ____________________________________________________________________________ +void PathQuery::addParameter(const SparqlTriple& triple) { + auto simpleTriple = triple.getSimple(); + TripleComponent predicate = simpleTriple.p_; + TripleComponent object = simpleTriple.o_; + + if (!predicate.isIri()) { + throw PathSearchException("Predicates must be IRIs"); + } + + auto getVariable = [](std::string_view parameter, + const TripleComponent& object) { + if (!object.isVariable()) { + throw PathSearchException(absl::StrCat("The value ", object.toString(), + " for parameter '", parameter, + "' has to be a variable")); + } + + return object.getVariable(); + }; + + auto setVariable = [&](std::string_view parameter, + const TripleComponent& object, + std::optional& existingValue) { + auto variable = getVariable(parameter, object); + + if (existingValue.has_value()) { + throw PathSearchException(absl::StrCat( + "The parameter '", parameter, "' has already been set to variable: '", + existingValue.value().toSparql(), "'. New variable: '", + object.toString(), "'.")); + } + + existingValue = object.getVariable(); + }; + + std::string predString = predicate.getIri().toStringRepresentation(); + if (predString.ends_with("source>")) { + sources_.push_back(std::move(object)); + } else if (predString.ends_with("target>")) { + targets_.push_back(std::move(object)); + } else if (predString.ends_with("start>")) { + setVariable("start", object, start_); + } else if (predString.ends_with("end>")) { + setVariable("end", object, end_); + } else if (predString.ends_with("pathColumn>")) { + setVariable("pathColumn", object, pathColumn_); + } else if (predString.ends_with("edgeColumn>")) { + setVariable("edgeColumn", object, edgeColumn_); + } else if (predString.ends_with("edgeProperty>")) { + edgeProperties_.push_back(getVariable("edgeProperty", object)); + } else if (predString.ends_with("cartesian>")) { + if (!object.isBool()) { + throw PathSearchException("The parameter 'cartesian' expects a boolean"); + } + cartesian_ = object.getBool(); + } else if (predString.ends_with("algorithm>")) { + if (!object.isIri()) { + throw PathSearchException("The 'algorithm' value has to be an Iri"); + } + auto objString = object.getIri().toStringRepresentation(); + + if (objString.ends_with("allPaths>")) { + algorithm_ = PathSearchAlgorithm::ALL_PATHS; + } else { + throw PathSearchException( + "Unsupported algorithm in pathSearch: " + objString + + ". Supported Algorithms: " + "allPaths."); + } + } else { + throw PathSearchException( + "Unsupported argument " + predString + + " in PathSearch. " + "Supported Arguments: source, target, start, end, " + "pathColumn, edgeColumn, " + "edgeProperty, algorithm."); + } +} + +// ____________________________________________________________________________ +std::variant> PathQuery::toSearchSide( + std::vector side, const Index::Vocab& vocab) const { + if (side.size() == 1 && side[0].isVariable()) { + return side[0].getVariable(); + } else { + std::vector sideIds; + for (const auto& comp : side) { + if (comp.isVariable()) { + throw PathSearchException( + "Only one variable is allowed per search side"); + } + auto opt = comp.toValueId(vocab); + if (opt.has_value()) { + sideIds.push_back(opt.value()); + } else { + throw PathSearchException("No vocabulary entry for " + comp.toString()); + } + } + return sideIds; + } +} + +// ____________________________________________________________________________ +void PathQuery::addBasicPattern(const BasicGraphPattern& pattern) { + for (SparqlTriple triple : pattern._triples) { + addParameter(triple); + } +} + +// ____________________________________________________________________________ +void PathQuery::addGraph(const GraphPatternOperation& op) { + if (childGraphPattern_._graphPatterns.empty()) { + auto pattern = std::get(op); + childGraphPattern_ = std::move(pattern._child); + } +} + +// ____________________________________________________________________________ +PathSearchConfiguration PathQuery::toPathSearchConfiguration( + const Index::Vocab& vocab) const { + auto sources = toSearchSide(sources_, vocab); + auto targets = toSearchSide(targets_, vocab); + + if (!start_.has_value()) { + throw PathSearchException("Missing parameter 'start' in path search."); + } else if (!end_.has_value()) { + throw PathSearchException("Missing parameter 'end' in path search."); + } else if (!pathColumn_.has_value()) { + throw PathSearchException("Missing parameter 'pathColumn' in path search."); + } else if (!edgeColumn_.has_value()) { + throw PathSearchException("Missing parameter 'edgeColumn' in path search."); + } + + return PathSearchConfiguration{ + algorithm_, sources, targets, + start_.value(), end_.value(), pathColumn_.value(), + edgeColumn_.value(), edgeProperties_, cartesian_}; +} + // ____________________________________________________________________________ cppcoro::generator Bind::containedVariables() const { for (const auto* ptr : _expression.containedVariables()) { diff --git a/src/parser/GraphPatternOperation.h b/src/parser/GraphPatternOperation.h index df95f80e89..6367d4e510 100644 --- a/src/parser/GraphPatternOperation.h +++ b/src/parser/GraphPatternOperation.h @@ -8,6 +8,7 @@ #include #include +#include "engine/PathSearch.h" #include "engine/sparqlExpressions/SparqlExpressionPimpl.h" #include "parser/GraphPattern.h" #include "parser/TripleComponent.h" @@ -143,6 +144,91 @@ struct TransPath { GraphPattern _childGraphPattern; }; +class PathSearchException : public std::exception { + std::string message_; + + public: + explicit PathSearchException(const std::string& message) + : message_(message) {} + const char* what() const noexcept override { return message_.data(); } +}; + +// The PathQuery object holds intermediate information for the PathSearch. +// The PathSearchConfiguration requires concrete Ids. The vocabulary from the +// QueryPlanner is needed to translate the TripleComponents to ValueIds. +// Also, the members of the PathQuery have defaults and can be set after +// the object creation, simplifying the parsing process. If a required +// value has not been set during parsing, the method 'toPathSearchConfiguration' +// will throw an exception. +// All the error handling for the PathSearch happens in the PathQuery object. +// Thus, if a PathSearchConfiguration can be constructed, it is valid. +struct PathQuery { + std::vector sources_; + std::vector targets_; + std::optional start_; + std::optional end_; + std::optional pathColumn_; + std::optional edgeColumn_; + std::vector edgeProperties_; + PathSearchAlgorithm algorithm_; + + GraphPattern childGraphPattern_; + bool cartesian_ = true; + + /** + * @brief Add a parameter to the PathQuery from the given triple. + * The predicate of the triple determines the parameter name and the object + * of the triple determines the parameter value. The subject is ignored. + * Throws a PathSearchException if an unsupported algorithm is given or if the + * predicate contains an unknown parameter name. + * + * @param triple A SparqlTriple that contains the parameter info + */ + void addParameter(const SparqlTriple& triple); + + /** + * @brief Add the parameters from a BasicGraphPattern to the PathQuery + * + * @param pattern + */ + void addBasicPattern(const BasicGraphPattern& pattern); + + /** + * @brief Add a GraphPatternOperation to the PathQuery. The pattern specifies + * the edges of the graph that is used by the path search + * + * @param childGraphPattern + */ + void addGraph(const GraphPatternOperation& childGraphPattern); + + /** + * @brief Convert the vector of triple components into a SearchSide + * The SeachSide can either be a variable or a list of Ids. + * A PathSearchException is thrown if more than one variable is given. + * + * @param side A vector of TripleComponents, containing either exactly one + * Variable or zero or more ValueIds + * @param vocab A Vocabulary containing the Ids of the TripleComponents. + * The Vocab is only used if the given vector contains IRIs. + */ + std::variant> toSearchSide( + std::vector side, const Index::Vocab& vocab) const; + + /** + * @brief Convert this PathQuery into a PathSearchConfiguration object. + * This method checks if all required parameters are set and converts + * the PathSearch sources and targets into SearchSides. + * A PathSearchException is thrown if required parameters are missing. + * The required parameters are start, end, pathColumn and edgeColumn. + * + * @param vocab A vocab containing the Ids of the IRIs in + * sources_ and targets_ + * @return A valid PathSearchConfiguration + */ + PathSearchConfiguration toPathSearchConfiguration( + const Index::Vocab& vocab) const; +}; + // A SPARQL Bind construct. struct Bind { sparqlExpression::SparqlExpressionPimpl _expression; @@ -159,7 +245,7 @@ struct Bind { // class actually becomes `using GraphPatternOperation = std::variant<...>` using GraphPatternOperationVariant = std::variant; + Values, Service, PathQuery, Minus, GroupGraphPattern>; struct GraphPatternOperation : public GraphPatternOperationVariant, public VisitMixin { diff --git a/src/parser/TripleComponent.h b/src/parser/TripleComponent.h index 85450c9975..fb874fc3c1 100644 --- a/src/parser/TripleComponent.h +++ b/src/parser/TripleComponent.h @@ -137,6 +137,12 @@ class TripleComponent { return std::holds_alternative(_variant); } + [[nodiscard]] bool isBool() const { + return std::holds_alternative(_variant); + } + + bool getBool() const { return std::get(_variant); } + bool isLiteral() const { return std::holds_alternative(_variant); } Literal& getLiteral() { return std::get(_variant); } const Literal& getLiteral() const { return std::get(_variant); } diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 777f29714d..65de837da2 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -24,6 +24,7 @@ #include "engine/sparqlExpressions/RelationalExpressions.h" #include "engine/sparqlExpressions/SampleExpression.h" #include "engine/sparqlExpressions/UuidExpressions.h" +#include "parser/GraphPatternOperation.h" #include "parser/RdfParser.h" #include "parser/SparqlParser.h" #include "parser/TokenizerCtre.h" @@ -708,8 +709,33 @@ GraphPatternOperation Visitor::visit(Parser::OptionalGraphPatternContext* ctx) { return GraphPatternOperation{parsedQuery::Optional{std::move(pattern)}}; } +GraphPatternOperation Visitor::visitPathQuery( + Parser::ServiceGraphPatternContext* ctx) { + auto parsePathQuery = [](parsedQuery::PathQuery& pathQuery, + const parsedQuery::GraphPatternOperation& op) { + if (std::holds_alternative(op)) { + pathQuery.addBasicPattern(std::get(op)); + } else if (std::holds_alternative(op)) { + pathQuery.addGraph(op); + } else { + throw parsedQuery::PathSearchException( + "Unsupported element in pathSearch." + "PathQuery may only consist of triples for configuration" + "And a { group graph pattern } specifying edges."); + } + }; + + parsedQuery::GraphPattern graphPattern = visit(ctx->groupGraphPattern()); + parsedQuery::PathQuery pathQuery; + for (const auto& op : graphPattern._graphPatterns) { + parsePathQuery(pathQuery, op); + } + + return pathQuery; +} + // Parsing for the `serviceGraphPattern` rule. -parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { +GraphPatternOperation Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { // Get the IRI and if a variable is specified, report that we do not support // it yet. // @@ -728,6 +754,10 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { auto serviceIri = TripleComponent::Iri::fromIriref(std::get(varOrIri).iri()); + if (serviceIri.toStringRepresentation() == + "") { + return visitPathQuery(ctx); + } // Parse the body of the SERVICE query. Add the visible variables from the // SERVICE clause to the visible variables so far, but also remember them // separately (with duplicates removed) because we need them in `Service.cpp` @@ -743,9 +773,10 @@ parsedQuery::Service Visitor::visit(Parser::ServiceGraphPatternContext* ctx) { visibleVariablesServiceQuery.begin(), visibleVariablesServiceQuery.end()); // Create suitable `parsedQuery::Service` object and return it. - return {std::move(visibleVariablesServiceQuery), std::move(serviceIri), - prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), - static_cast(ctx->SILENT())}; + return parsedQuery::Service{ + std::move(visibleVariablesServiceQuery), std::move(serviceIri), + prologueString_, getOriginalInputForContext(ctx->groupGraphPattern()), + static_cast(ctx->SILENT())}; } // ____________________________________________________________________________ diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.h b/src/parser/sparqlParser/SparqlQleverVisitor.h index e68087b056..aa6ec1e0c9 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.h +++ b/src/parser/sparqlParser/SparqlQleverVisitor.h @@ -254,7 +254,11 @@ class SparqlQleverVisitor { parsedQuery::GraphPatternOperation visit( Parser::GraphGraphPatternContext* ctx); - parsedQuery::Service visit(Parser::ServiceGraphPatternContext* ctx); + parsedQuery::GraphPatternOperation visit( + Parser::ServiceGraphPatternContext* ctx); + + parsedQuery::GraphPatternOperation visitPathQuery( + Parser::ServiceGraphPatternContext* ctx); parsedQuery::GraphPatternOperation visit(Parser::BindContext* ctx); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bcbee2b48e..eaf4b037de 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -161,6 +161,8 @@ addLinkAndDiscoverTest(IdTableTest util) addLinkAndDiscoverTest(TransitivePathTest engine) +addLinkAndDiscoverTest(PathSearchTest engine) + addLinkAndDiscoverTest(BatchedPipelineTest) addLinkAndDiscoverTest(TupleHelpersTest) diff --git a/test/PathSearchTest.cpp b/test/PathSearchTest.cpp new file mode 100644 index 0000000000..da8bd31c94 --- /dev/null +++ b/test/PathSearchTest.cpp @@ -0,0 +1,722 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Herrmann (johannes.r.herrmann(at)gmail.com) + +#include + +#include "engine/PathSearch.h" +#include "engine/QueryExecutionTree.h" +#include "engine/Result.h" +#include "engine/ValuesForTesting.h" +#include "gmock/gmock.h" +#include "util/IdTableHelpers.h" +#include "util/IdTestHelpers.h" +#include "util/IndexTestHelpers.h" + +using ad_utility::testing::getQec; +namespace { +auto V = ad_utility::testing::VocabId; +auto I = ad_utility::testing::IntId; +using Var = Variable; +using Vars = std::vector>; + +} // namespace + +Result performPathSearch(PathSearchConfiguration config, IdTable input, + Vars vars) { + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(input), vars); + PathSearch p = PathSearch(qec, std::move(subtree), std::move(config)); + + return p.computeResult(false); +} + +TEST(PathSearchTest, constructor) { + auto qec = getQec(); + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + + std::vector sources{V(0)}; + std::vector targets{V(1)}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + PathSearch p = PathSearch(qec, std::move(subtree), config); +} + +TEST(PathSearchTest, emptyGraph) { + auto sub = makeIdTableFromVector({}); + sub.setNumColumns(2); + auto expected = makeIdTableFromVector({}); + expected.setNumColumns(4); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + */ +TEST(PathSearchTest, singlePath) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathWithProperties) { + auto sub = + makeIdTableFromVector({{0, 1, 10}, {1, 2, 20}, {2, 3, 30}, {3, 4, 40}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(10)}, + {V(1), V(2), I(0), I(1), V(20)}, + {V(2), V(3), I(0), I(2), V(30)}, + {V(3), V(4), I(0), I(3), V(40)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty"}}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + {V(1), V(2), I(1), I(0), V(1)}, + {V(2), V(3), I(1), I(1), V(1)}, + {V(3), V(4), I(1), I(2), V(1)}, + {V(2), V(3), I(2), I(0), V(2)}, + {V(3), V(4), I(2), I(1), V(2)}, + {V(3), V(4), I(3), I(0), V(3)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, singlePathAllTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, + {V(0), V(1), I(2), I(0), V(3)}, + {V(1), V(2), I(2), I(1), V(3)}, + {V(2), V(3), I(2), I(2), V(3)}, + {V(0), V(1), I(3), I(0), V(4)}, + {V(1), V(2), I(3), I(1), V(4)}, + {V(2), V(3), I(3), I(2), V(4)}, + {V(3), V(4), I(3), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * \ / + * > 2 < + */ +TEST(PathSearchTest, twoPathsOneTarget) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 2}}); + auto expected = makeIdTableFromVector({ + {V(0), V(3), I(0), I(0)}, + {V(3), V(2), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(2)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * / \ + * 1 < > 3 + * / \ + * 2 < > 4 + */ +TEST(PathSearchTest, twoPathsTwoTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 3}, {3, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(3), I(0), I(0)}, + {V(3), V(4), I(0), I(1)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(2), V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 2<---1 + * \ ^ + * \ | + * > 0 + */ +TEST(PathSearchTest, cycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(2)}, + {V(1), V(2), I(1), I(1), V(2)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 2<---1--->3 + * \ ^ / + * \ | / + * > 0 < + */ +TEST(PathSearchTest, twoCycle) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 0}, {1, 3}, {3, 0}}); + auto expected = makeIdTableFromVector({{V(0), V(1), I(0), I(0), V(1)}, + {V(0), V(1), I(1), I(0), V(3)}, + {V(1), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(2)}, + {V(1), V(2), I(2), I(1), V(2)}}); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * / \ + * 1 2 + * \ / \ + * 3 4 + */ +TEST(PathSearchTest, allPaths) { + auto sub = makeIdTableFromVector({{0, 1}, {0, 2}, {1, 3}, {2, 3}, {2, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(4)}, + {V(2), V(4), I(1), I(1), V(4)}, + {V(0), V(2), I(2), I(0), V(3)}, + {V(2), V(3), I(2), I(1), V(3)}, + {V(0), V(1), I(3), I(0), V(1)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(3), I(4), I(1), V(3)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsWithPropertiesSwitched) { + auto sub = makeIdTableFromVector({{0, 1, 10, 11}, + {1, 3, 20, 21}, + {0, 2, 30, 31}, + {2, 3, 40, 41}, + {2, 4, 50, 51}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2), V(31), V(30)}, + {V(0), V(2), I(1), I(0), V(4), V(31), V(30)}, + {V(2), V(4), I(1), I(1), V(4), V(51), V(50)}, + {V(0), V(2), I(2), I(0), V(3), V(31), V(30)}, + {V(2), V(3), I(2), I(1), V(3), V(41), V(40)}, + {V(0), V(1), I(3), I(0), V(1), V(11), V(10)}, + {V(0), V(1), I(4), I(0), V(3), V(11), V(10)}, + {V(1), V(3), I(4), I(1), V(3), V(21), V(20)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}, Variable{"?edgeProperty1"}, + Variable{"?edgeProperty2"}}; + PathSearchConfiguration config{ + PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {Var{"?edgeProperty2"}, Var{"?edgeProperty1"}}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * + * 0 + * |\ + * | \ + * 1->2->3 + */ +TEST(PathSearchTest, allPathsPartialAllTargets) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(2)}, + {V(0), V(2), I(1), I(0), V(3)}, + {V(2), V(3), I(1), I(1), V(3)}, + {V(0), V(1), I(2), I(0), V(1)}, + {V(0), V(1), I(3), I(0), V(2)}, + {V(1), V(2), I(3), I(1), V(2)}, + {V(0), V(1), I(4), I(0), V(3)}, + {V(1), V(2), I(4), I(1), V(3)}, + {V(2), V(3), I(4), I(2), V(3)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?targets"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, allPathsPartialAllSources) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {0, 2}, {2, 3}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0), V(0)}, + {V(2), V(3), I(0), I(1), V(0)}, + {V(0), V(1), I(1), I(0), V(0)}, + {V(1), V(2), I(1), I(1), V(0)}, + {V(2), V(3), I(1), I(2), V(0)}, + {V(1), V(2), I(2), I(0), V(1)}, + {V(2), V(3), I(2), I(1), V(1)}, + {V(2), V(3), I(3), I(0), V(2)}, + }); + + std::vector targets{V(3)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?sources"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 -> 1 -> 2 -> 3 -> 4 + * ^ + * / + * 5 + */ +TEST(PathSearchTest, singlePathWithIrrelevantNode) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {5, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(2), I(0), I(1)}, + {V(2), V(3), I(0), I(2)}, + {V(3), V(4), I(0), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 + * | + * 1 + * / \ + * 2 3 + * \ / + * 4 + * | + * 5 + */ +TEST(PathSearchTest, elongatedDiamond) { + auto sub = + makeIdTableFromVector({{0, 1}, {1, 2}, {1, 3}, {2, 4}, {3, 4}, {4, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0)}, + {V(1), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(4), V(5), I(0), I(3)}, + {V(0), V(1), I(1), I(0)}, + {V(1), V(2), I(1), I(1)}, + {V(2), V(4), I(1), I(2)}, + {V(4), V(5), I(1), I(3)}, + }); + + std::vector sources{V(0)}; + std::vector targets{V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +/** + * Graph: + * 0 4 + * \ / + * 2-->3 + * / \ + * 1 5 + */ +TEST(PathSearchTest, multiSourceMultiTargetallPaths) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(5), I(0), I(2)}, + {V(0), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(4), I(1), I(2)}, + {V(1), V(2), I(2), I(0)}, + {V(2), V(3), I(2), I(1)}, + {V(3), V(5), I(2), I(2)}, + {V(1), V(2), I(3), I(0)}, + {V(2), V(3), I(3), I(1)}, + {V(3), V(4), I(3), I(2)}, + }); + + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, multiSourceMultiTargetallPathsNotCartesian) { + auto sub = makeIdTableFromVector({{0, 2}, {1, 2}, {2, 3}, {3, 4}, {3, 5}}); + auto expected = makeIdTableFromVector({ + {V(0), V(2), I(0), I(0)}, + {V(2), V(3), I(0), I(1)}, + {V(3), V(4), I(0), I(2)}, + {V(1), V(2), I(1), I(0)}, + {V(2), V(3), I(1), I(1)}, + {V(3), V(5), I(1), I(2)}, + }); + + std::vector sources{V(0), V(1)}; + std::vector targets{V(4), V(5)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}, + false}; + + auto resultTable = performPathSearch(config, std::move(sub), vars); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, sourceBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sourceTable = makeIdTableFromVector({{0}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0)}, + {V(1), V(2), I(0), I(1), V(0)}, + {V(2), V(3), I(0), I(2), V(0)}, + {V(3), V(4), I(0), I(3), V(0)}, + }); + + std::vector targets{V(4)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + targets, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sourceTreeVars = {Var{"?source"}}; + auto sourceTree = ad_utility::makeExecutionTree( + qec, std::move(sourceTable), sourceTreeVars); + pathSearch.bindSourceSide(sourceTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, targetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto targetTable = makeIdTableFromVector({{4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(4)}, + {V(1), V(2), I(0), I(1), V(4)}, + {V(2), V(3), I(0), I(2), V(4)}, + {V(3), V(4), I(0), I(3), V(4)}, + }); + + std::vector sources{V(0)}; + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars targetTreeVars = {Var{"?target"}}; + auto targetTree = ad_utility::makeExecutionTree( + qec, std::move(targetTable), targetTreeVars); + pathSearch.bindTargetSide(targetTree, 0); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} + +TEST(PathSearchTest, sourceAndTargetBound) { + auto sub = makeIdTableFromVector({{0, 1}, {1, 2}, {2, 3}, {3, 4}}); + auto sideTable = makeIdTableFromVector({{0, 4}}); + auto expected = makeIdTableFromVector({ + {V(0), V(1), I(0), I(0), V(0), V(4)}, + {V(1), V(2), I(0), I(1), V(0), V(4)}, + {V(2), V(3), I(0), I(2), V(0), V(4)}, + {V(3), V(4), I(0), I(3), V(0), V(4)}, + }); + + Vars vars = {Variable{"?start"}, Variable{"?end"}}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + Var{"?source"}, + Var{"?target"}, + Var{"?start"}, + Var{"?end"}, + Var{"?edgeIndex"}, + Var{"?pathIndex"}, + {}}; + + auto qec = getQec(); + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(sub), vars); + auto pathSearch = PathSearch(qec, std::move(subtree), std::move(config)); + + Vars sideTreeVars = {Var{"?source"}, Var{"?target"}}; + auto sideTree = ad_utility::makeExecutionTree( + qec, std::move(sideTable), sideTreeVars); + pathSearch.bindSourceAndTargetSide(sideTree, 0, 1); + + auto resultTable = pathSearch.computeResult(false); + ASSERT_THAT(resultTable.idTable(), + ::testing::UnorderedElementsAreArray(expected)); +} diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 955ebc88ee..5db2454f1a 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -7,7 +7,9 @@ #include "QueryPlannerTestHelpers.h" #include "engine/QueryPlanner.h" #include "engine/SpatialJoin.h" +#include "parser/GraphPatternOperation.h" #include "parser/SparqlParser.h" +#include "parser/data/Variable.h" #include "util/TripleComponentTestHelpers.h" namespace h = queryPlannerTestHelpers; @@ -783,6 +785,704 @@ TEST(QueryPlanner, TransitivePathBindRight) { ad_utility::testing::getQec("

.

")); } +TEST(QueryPlanner, PathSearchSingleTarget) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian true;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} +TEST(QueryPlanner, PathSearchMultipleSourcesAndTargetsNonCartesian) { + auto scan = h::IndexScanFromStrings; + auto qec = + ad_utility::testing::getQec("

.

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId(""), getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}, + false}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:cartesian false;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end")), qec); +} + +TEST(QueryPlanner, PathSearchWithEdgeProperties) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))), + qec); +} + +TEST(QueryPlanner, PathSearchWithMultipleEdgePropertiesAndTargets) { + auto scan = h::IndexScanFromStrings; + auto join = h::UnorderedJoins; + auto qec = ad_utility::testing::getQec( + " ." + " ." + " ." + " ." + " ." + " "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId(""), getId("")}; + PathSearchConfiguration config{ + PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle"), Variable("?middleAttribute")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "pathSearch:edgeProperty ?middleAttribute;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?middleAttribute." + "?middle ?end." + "}}}}", + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?middleAttribute"), + scan("?middle", "", "?end")))), + qec); +} + +TEST(QueryPlanner, PathSearchJoinOnEdgeProperty) { + auto scan = h::IndexScanFromStrings; + auto join = h::Join; + auto qec = ad_utility::testing::getQec( + " . . . "); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {Variable("?middle")}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?middle {} " + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "pathSearch:edgeProperty ?middle;" + "{SELECT * WHERE {" + "?start ?middle." + "?middle ?end." + "}}}}", + join(h::Sort(h::ValuesClause("VALUES (?middle) { () }")), + h::Sort( + h::PathSearch(config, true, true, + h::Sort(join(scan("?start", "", "?middle"), + scan("?middle", "", "?end")))))), + qec); +} + +TEST(QueryPlanner, PathSearchSourceBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + std::vector targets{getId("")}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?source {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }")), + qec); +} + +TEST(QueryPlanner, PathSearchTargetBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + std::vector sources{getId("")}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES ?target {}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?target) { () }")), + qec); +} + +TEST(QueryPlanner, PathSearchBothBound) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source ?target) {( )}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source\t?target) { ( ) }")), + qec); +} + +TEST(QueryPlanner, PathSearchBothBoundIndividually) { + auto scan = h::IndexScanFromStrings; + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + Variable sources{"?source"}; + Variable targets{"?target"}; + PathSearchConfiguration config{PathSearchAlgorithm::ALL_PATHS, + sources, + targets, + Variable("?start"), + Variable("?end"), + Variable("?path"), + Variable("?edge"), + {}}; + h::expect( + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "VALUES (?source) {()}" + "VALUES (?target) {()}" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source ;" + "pathSearch:target ?target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}", + h::PathSearch(config, true, true, scan("?start", "

", "?end"), + h::ValuesClause("VALUES (?source) { () }"), + h::ValuesClause("VALUES (?target) { () }")), + qec); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingStart) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleStarts) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start1;" + "pathSearch:start ?start2;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'start' has already been set " + "to variable: '?start1'. New variable: '?start2'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMissingEnd) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Missing parameter 'end'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchMultipleEnds) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end1;" + "pathSearch:end ?end2;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("parameter 'end' has already been set " + "to variable: '?end1'. New variable: '?end2'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchStartNotVariable) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("The value for parameter 'start'"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchPredicateNotIri) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path ?algorithm pathSearch:allPaths ;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE(h::parseAndPlan(std::move(query), qec), + HasSubstr("Predicates must be IRIs"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedArgument) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + " ?error;" + "pathSearch:source ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported argument in PathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchTwoVariablesForSource) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Only one variable is allowed per search side"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedElement) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:allPaths ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "VALUES ?middle {}" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported element in pathSearch"), + parsedQuery::PathSearchException); +} + +// __________________________________________________________________________ +TEST(QueryPlanner, PathSearchUnsupportedAlgorithm) { + auto qec = ad_utility::testing::getQec("

.

"); + auto getId = ad_utility::testing::makeGetId(qec->getIndex()); + + auto query = + "PREFIX pathSearch: " + "SELECT ?start ?end ?path ?edge WHERE {" + "SERVICE pathSearch: {" + "_:path pathSearch:algorithm pathSearch:shortestPath ;" + "pathSearch:source ?source1 ;" + "pathSearch:source ?source2 ;" + "pathSearch:target ;" + "pathSearch:pathColumn ?path ;" + "pathSearch:edgeColumn ?edge ;" + "pathSearch:start ?start;" + "pathSearch:end ?end;" + "{SELECT * WHERE {" + "?start

?end." + "}}}}"; + AD_EXPECT_THROW_WITH_MESSAGE_AND_TYPE( + h::parseAndPlan(std::move(query), qec), + HasSubstr("Unsupported algorithm in pathSearch"), + parsedQuery::PathSearchException); +} + TEST(QueryPlanner, SpatialJoinViaMaxDistPredicate) { auto scan = h::IndexScanFromStrings; h::expect( diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 17622ee5eb..2457284c87 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -8,6 +8,7 @@ #include #include +#include #include "./util/GTestHelpers.h" #include "engine/Bind.h" @@ -22,6 +23,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/OptionalJoin.h" #include "engine/OrderBy.h" +#include "engine/PathSearch.h" #include "engine/QueryExecutionTree.h" #include "engine/QueryPlanner.h" #include "engine/Service.h" @@ -292,6 +294,38 @@ inline auto TransitivePath = TransitivePathSideMatcher(right)))); }; +inline auto PathSearchConfigMatcher = [](PathSearchConfiguration config) { + auto sourceMatcher = + AD_FIELD(PathSearchConfiguration, sources_, Eq(config.sources_)); + auto targetMatcher = + AD_FIELD(PathSearchConfiguration, targets_, Eq(config.targets_)); + return AllOf( + AD_FIELD(PathSearchConfiguration, algorithm_, Eq(config.algorithm_)), + sourceMatcher, targetMatcher, + AD_FIELD(PathSearchConfiguration, start_, Eq(config.start_)), + AD_FIELD(PathSearchConfiguration, end_, Eq(config.end_)), + AD_FIELD(PathSearchConfiguration, pathColumn_, Eq(config.pathColumn_)), + AD_FIELD(PathSearchConfiguration, edgeColumn_, Eq(config.edgeColumn_)), + AD_FIELD(PathSearchConfiguration, edgeProperties_, + UnorderedElementsAreArray(config.edgeProperties_))); +}; + +// Match a PathSearch operation +inline auto PathSearch = + [](PathSearchConfiguration config, bool sourceBound, bool targetBound, + const std::same_as auto&... childMatchers) { + return RootOperation<::PathSearch>(AllOf( + children(childMatchers...), + AD_PROPERTY(PathSearch, getConfig, PathSearchConfigMatcher(config)), + AD_PROPERTY(PathSearch, isSourceBound, Eq(sourceBound)), + AD_PROPERTY(PathSearch, isTargetBound, Eq(targetBound)))); + }; + +inline auto ValuesClause = [](string cacheKey) { + return RootOperation<::Values>( + AllOf(AD_PROPERTY(Values, getCacheKey, cacheKey))); +}; + // Match a SpatialJoin operation, set arguments to ignore to -1 inline auto SpatialJoin = [](size_t maxDist, size_t maxResults,