From c562c81b7c210a8b0dd42652229221ed41f8366f Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Fri, 18 Oct 2024 23:40:50 +0200 Subject: [PATCH 1/3] Add function `geof:isWktPoint` The function returns true if and only if the argument is a WKT point. This can be checked efficiently by checking the datatype bits of the ID. Note that this function is not part of the GeoSPARQL standard. We add it because we need it for https://github.com/ad-freiburg/qlever-petrimaps to fetch alll WKT literals that are not points efficiently. --- .../sparqlExpressions/IsSomethingExpressions.cpp | 14 ++++++++++---- src/engine/sparqlExpressions/NaryExpression.h | 1 + .../SparqlExpressionValueGetters.h | 12 ++++++++++++ src/parser/sparqlParser/SparqlQleverVisitor.cpp | 3 +++ test/SparqlAntlrParserTest.cpp | 2 ++ test/SparqlExpressionTest.cpp | 3 +++ 6 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/engine/sparqlExpressions/IsSomethingExpressions.cpp b/src/engine/sparqlExpressions/IsSomethingExpressions.cpp index 0d1780b756..cdfa18364d 100644 --- a/src/engine/sparqlExpressions/IsSomethingExpressions.cpp +++ b/src/engine/sparqlExpressions/IsSomethingExpressions.cpp @@ -24,14 +24,17 @@ namespace detail { // `...Expression` (`std::move` the arguments into the constructor). The // function should be declared in `NaryExpression.h`. -// Expressions for `isIRI`, `isBlank`, `isLiteral`, and `isNumeric`. Note that -// the value getters already return the correct `Id`, hence `std::identity`. +// Expressions for the builtin functions `isIRI`, `isBlank`, `isLiteral`, +// `isNumeric`, and the custom function `isWktPoint`. Note that the value +// getters already return the correct `Id`, hence `std::identity`. using isIriExpression = NARY<1, FV>; using isBlankExpression = NARY<1, FV>; using isLiteralExpression = NARY<1, FV>; using isNumericExpression = NARY<1, FV>; -// The expression for `bound` is slightly different because -// `IsValidValueGetter` returns a `bool` and not an `Id`. +using isWktPointExpression = NARY<1, FV>; + +// The expression for `bound` is slightly different as `IsValidValueGetter` +// returns a `bool` and not an `Id`. inline auto boolToId = [](bool b) { return Id::makeFromBool(b); }; using boundExpression = NARY<1, FV>; @@ -49,6 +52,9 @@ SparqlExpression::Ptr makeIsLiteralExpression(SparqlExpression::Ptr arg) { SparqlExpression::Ptr makeIsNumericExpression(SparqlExpression::Ptr arg) { return std::make_unique(std::move(arg)); } +SparqlExpression::Ptr makeIsWktPointExpression(SparqlExpression::Ptr arg) { + return std::make_unique(std::move(arg)); +} SparqlExpression::Ptr makeBoundExpression(SparqlExpression::Ptr arg) { return std::make_unique(std::move(arg)); } diff --git a/src/engine/sparqlExpressions/NaryExpression.h b/src/engine/sparqlExpressions/NaryExpression.h index 5237a2283a..c82e1b22ff 100644 --- a/src/engine/sparqlExpressions/NaryExpression.h +++ b/src/engine/sparqlExpressions/NaryExpression.h @@ -119,6 +119,7 @@ SparqlExpression::Ptr makeIsIriExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsBlankExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsLiteralExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsNumericExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeIsWktPointExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeBoundExpression(SparqlExpression::Ptr child); // For a `function` that takes `std::vector` (size only diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 6e7cd310ec..194c6c4b43 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -190,6 +190,18 @@ struct IsNumericValueGetter : Mixin { } }; +// Value getter for `isWktPoint`. +struct IsWktPointValueGetter : Mixin { + using Mixin::operator(); + Id operator()(ValueId id, const EvaluationContext*) const { + return Id::makeFromBool(id.getDatatype() == Datatype::GeoPoint); + } + + Id operator()(const LiteralOrIri&, const EvaluationContext*) const { + return Id::makeFromBool(false); + } +}; + /// This class can be used as the `ValueGetter` argument of Expression /// templates. It produces a `std::optional`. struct DateValueGetter : Mixin { diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 777f29714d..6cfb5831de 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -112,6 +112,9 @@ ExpressionPtr Visitor::processIriFunctionCall( } else if (functionName == "latitude") { checkNumArgs(1); return sparqlExpression::makeLatitudeExpression(std::move(argList[0])); + } else if (functionName == "iswktpoint") { + checkNumArgs(1); + return sparqlExpression::makeIsWktPointExpression(std::move(argList[0])); } } else if (checkPrefix(MATH_PREFIX)) { if (functionName == "log") { diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 0ee1705f46..598809e1ba 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -1655,6 +1655,8 @@ TEST(SparqlParser, FunctionCall) { matchUnary(&makeLatitudeExpression)); expectFunctionCall(absl::StrCat(geof, "longitude>(?x)"), matchUnary(&makeLongitudeExpression)); + expectFunctionCall(absl::StrCat(geof, "iswktpoint>(?x)"), + matchUnary(&makeIsWktPointExpression)); expectFunctionCall( absl::StrCat(geof, "distance>(?a, ?b)"), matchNary(&makeDistExpression, Variable{"?a"}, Variable{"?b"})); diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index ab5b3add68..da9083e23b 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -1117,6 +1117,7 @@ TEST(SparqlExpression, testToNumericExpression) { TEST(SparqlExpression, geoSparqlExpressions) { auto checkLat = testUnaryExpression<&makeLatitudeExpression>; auto checkLong = testUnaryExpression<&makeLongitudeExpression>; + auto checkIsWktPoint = testUnaryExpression<&makeIsWktPointExpression>; auto checkDist = std::bind_front(testNaryExpression, &makeDistExpression); auto p = GeoPoint(26.8, 24.3); @@ -1136,9 +1137,11 @@ TEST(SparqlExpression, geoSparqlExpressions) { checkLat(v, vLat); checkLong(v, vLng); + checkIsWktPoint(v, B(true)); checkDist(D(0.0), v, v); checkLat(idOrLitOrStringVec({"NotAPoint", I(12)}), Ids{U, U}); checkLong(idOrLitOrStringVec({D(4.2), "NotAPoint"}), Ids{U, U}); + checkIsWktPoint(IdOrLiteralOrIri{lit("NotAPoint")}, B(false)); checkDist(U, v, IdOrLiteralOrIri{I(12)}); checkDist(U, IdOrLiteralOrIri{I(12)}, v); checkDist(U, v, IdOrLiteralOrIri{lit("NotAPoint")}); From 26500a938f90d6f0d81c9de84f66ccaeb0e45ca0 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 19 Oct 2024 03:22:10 +0200 Subject: [PATCH 2/3] Rename function from `geof:iswktpoint` to `geof:isPointWKT` Reason: the predicate is called `geo:asWKT`, so the new name looks more consistent with that. --- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 2 +- test/SparqlAntlrParserTest.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 6cfb5831de..099ee581ff 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -112,7 +112,7 @@ ExpressionPtr Visitor::processIriFunctionCall( } else if (functionName == "latitude") { checkNumArgs(1); return sparqlExpression::makeLatitudeExpression(std::move(argList[0])); - } else if (functionName == "iswktpoint") { + } else if (functionName == "isPointWKT") { checkNumArgs(1); return sparqlExpression::makeIsWktPointExpression(std::move(argList[0])); } diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index 598809e1ba..e2a0a0ed6b 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -1655,7 +1655,7 @@ TEST(SparqlParser, FunctionCall) { matchUnary(&makeLatitudeExpression)); expectFunctionCall(absl::StrCat(geof, "longitude>(?x)"), matchUnary(&makeLongitudeExpression)); - expectFunctionCall(absl::StrCat(geof, "iswktpoint>(?x)"), + expectFunctionCall(absl::StrCat(geof, "isPointWKT>(?x)"), matchUnary(&makeIsWktPointExpression)); expectFunctionCall( absl::StrCat(geof, "distance>(?a, ?b)"), From 7e741be16f560e0074f9f1b3a4dedb2968c1cb9b Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Sat, 26 Oct 2024 13:54:29 +0200 Subject: [PATCH 3/3] Make it a QLever-internal function + rename constants 1. It was not a good idea to make this a `geof:` function for two reasons: First, it's not a standard `geof:` function. Second, and worse, the function does not actually check whether the argument is a WKT point, it only checks whether the argument is a WKT point that is stored in the `Id`. There are also WKT points that are not stored in the `Id` for various reasons (notably, when the coordinates are out of range). It is therefore now a QLever-internal function with the name `ql:isGeoPoint`, following the name of the `Datatype`. 2. This is our first QLever-internal function, so far we only had QLever-internal predicates (like `ql:has-predicate` or `ql:langtag`) and entities (like `ql:@en`, which technicall is not a valid IRI, but that is not relevant here). This required new constants in `Constants.h`, which prompted me to give better names to the other constants there related to QLever-internal stuff. This, in turn, required renaming in quite a few files. It looks like a lot, but almost all of it is really just straightforward renaming. --- src/engine/QueryPlanner.cpp | 2 +- .../sparqlExpressions/CountStarExpression.cpp | 3 +- .../IsSomethingExpressions.cpp | 10 ++-- src/engine/sparqlExpressions/NaryExpression.h | 4 +- .../SparqlExpressionValueGetters.h | 58 ++++++++---------- src/global/Constants.h | 60 ++++++++++--------- src/global/SpecialIds.h | 2 +- src/index/ConstantsIndexBuilding.h | 2 +- src/index/IndexImpl.cpp | 8 +-- src/index/Permutation.cpp | 3 +- src/index/Vocabulary.cpp | 2 +- src/index/VocabularyMerger.h | 3 +- src/parser/ParsedQuery.cpp | 13 ++-- src/parser/SparqlParser.cpp | 2 +- .../sparqlParser/SparqlQleverVisitor.cpp | 27 +++++++-- src/util/Conversions.cpp | 8 +-- test/AggregateExpressionTest.cpp | 2 +- test/ConstantsTest.cpp | 8 +-- test/QueryPlannerTest.cpp | 2 +- test/SparqlAntlrParserTest.cpp | 16 ++--- test/SparqlAntlrParserTestHelpers.h | 8 +-- test/SparqlExpressionTest.cpp | 6 +- 22 files changed, 134 insertions(+), 115 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index b63958e198..c18527c186 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -937,7 +937,7 @@ ParsedQuery::GraphPattern QueryPlanner::uniteGraphPatterns( // _____________________________________________________________________________ Variable QueryPlanner::generateUniqueVarName() { - return Variable{absl::StrCat(INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX, + return Variable{absl::StrCat(QLEVER_INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX, _internalVarCount++)}; } diff --git a/src/engine/sparqlExpressions/CountStarExpression.cpp b/src/engine/sparqlExpressions/CountStarExpression.cpp index 29f55050c1..a56bd1b044 100644 --- a/src/engine/sparqlExpressions/CountStarExpression.cpp +++ b/src/engine/sparqlExpressions/CountStarExpression.cpp @@ -42,7 +42,8 @@ class CountStarExpression : public SparqlExpression { auto varToColNoInternalVariables = ctx->_variableToColumnMap | std::views::filter([](const auto& varAndIdx) { - return !varAndIdx.first.name().starts_with(INTERNAL_VARIABLE_PREFIX); + return !varAndIdx.first.name().starts_with( + QLEVER_INTERNAL_VARIABLE_PREFIX); }); table.setNumColumns(std::ranges::distance(varToColNoInternalVariables)); table.resize(ctx->size()); diff --git a/src/engine/sparqlExpressions/IsSomethingExpressions.cpp b/src/engine/sparqlExpressions/IsSomethingExpressions.cpp index cdfa18364d..dc196fe2b2 100644 --- a/src/engine/sparqlExpressions/IsSomethingExpressions.cpp +++ b/src/engine/sparqlExpressions/IsSomethingExpressions.cpp @@ -28,10 +28,12 @@ namespace detail { // `isNumeric`, and the custom function `isWktPoint`. Note that the value // getters already return the correct `Id`, hence `std::identity`. using isIriExpression = NARY<1, FV>; -using isBlankExpression = NARY<1, FV>; using isLiteralExpression = NARY<1, FV>; using isNumericExpression = NARY<1, FV>; -using isWktPointExpression = NARY<1, FV>; +using isBlankExpression = + NARY<1, FV>>; +using isGeoPointExpression = + NARY<1, FV>>; // The expression for `bound` is slightly different as `IsValidValueGetter` // returns a `bool` and not an `Id`. @@ -52,8 +54,8 @@ SparqlExpression::Ptr makeIsLiteralExpression(SparqlExpression::Ptr arg) { SparqlExpression::Ptr makeIsNumericExpression(SparqlExpression::Ptr arg) { return std::make_unique(std::move(arg)); } -SparqlExpression::Ptr makeIsWktPointExpression(SparqlExpression::Ptr arg) { - return std::make_unique(std::move(arg)); +SparqlExpression::Ptr makeIsGeoPointExpression(SparqlExpression::Ptr arg) { + return std::make_unique(std::move(arg)); } SparqlExpression::Ptr makeBoundExpression(SparqlExpression::Ptr arg) { return std::make_unique(std::move(arg)); diff --git a/src/engine/sparqlExpressions/NaryExpression.h b/src/engine/sparqlExpressions/NaryExpression.h index c82e1b22ff..8fb8d95043 100644 --- a/src/engine/sparqlExpressions/NaryExpression.h +++ b/src/engine/sparqlExpressions/NaryExpression.h @@ -116,10 +116,10 @@ std::optional getVariableFromLangExpression( SparqlExpression::Ptr makeEncodeForUriExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsIriExpression(SparqlExpression::Ptr child); -SparqlExpression::Ptr makeIsBlankExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsLiteralExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeIsNumericExpression(SparqlExpression::Ptr child); -SparqlExpression::Ptr makeIsWktPointExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeIsBlankExpression(SparqlExpression::Ptr child); +SparqlExpression::Ptr makeIsGeoPointExpression(SparqlExpression::Ptr child); SparqlExpression::Ptr makeBoundExpression(SparqlExpression::Ptr child); // For a `function` that takes `std::vector` (size only diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index 194c6c4b43..88dbc2d825 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -141,11 +141,13 @@ struct StringValueGetter : Mixin { } }; -// Value getter for `isBlank`. -struct IsBlankNodeValueGetter : Mixin { - using Mixin::operator(); - Id operator()(ValueId id, const EvaluationContext*) const { - return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex); +// Boolean value getter that checks whether the given `Id` is a `ValueId` of the +// given `datatype`. +template +struct IsValueIdValueGetter : Mixin> { + using Mixin::operator(); + Id operator()(Id id, const EvaluationContext*) const { + return Id::makeFromBool(id.getDatatype() == datatype); } Id operator()(const LiteralOrIri&, const EvaluationContext*) const { @@ -153,7 +155,21 @@ struct IsBlankNodeValueGetter : Mixin { } }; -// Value getters for `isIRI`, `isBlank`, and `isLiteral`. +// Boolean value getter for `isNumeric`. Regarding which datatypes count as +// numeric, see https://www.w3.org/TR/sparql11-query/#operandDataTypes . +struct IsNumericValueGetter : Mixin { + using Mixin::operator(); + Id operator()(ValueId id, const EvaluationContext*) const { + Datatype datatype = id.getDatatype(); + return Id::makeFromBool(datatype == Datatype::Double || + datatype == Datatype::Int); + } + Id operator()(const LiteralOrIri&, const EvaluationContext*) const { + return Id::makeFromBool(false); + } +}; + +// Boolean value getters for `isIRI`, `isBlank`, and `isLiteral`. template struct IsSomethingValueGetter : Mixin; -// Value getter for `isNumeric`. Regarding which datatypes count as numeric, -// see https://www.w3.org/TR/sparql11-query/#operandDataTypes . -struct IsNumericValueGetter : Mixin { - using Mixin::operator(); - Id operator()(ValueId id, const EvaluationContext*) const { - Datatype datatype = id.getDatatype(); - return Id::makeFromBool(datatype == Datatype::Double || - datatype == Datatype::Int); - } - Id operator()(const LiteralOrIri&, const EvaluationContext*) const { - return Id::makeFromBool(false); - } -}; - -// Value getter for `isWktPoint`. -struct IsWktPointValueGetter : Mixin { - using Mixin::operator(); - Id operator()(ValueId id, const EvaluationContext*) const { - return Id::makeFromBool(id.getDatatype() == Datatype::GeoPoint); - } - - Id operator()(const LiteralOrIri&, const EvaluationContext*) const { - return Id::makeFromBool(false); - } -}; - -/// This class can be used as the `ValueGetter` argument of Expression -/// templates. It produces a `std::optional`. +// This class can be used as the `ValueGetter` argument of Expression +// templates. It produces a `std::optional`. struct DateValueGetter : Mixin { using Mixin::operator(); using Opt = std::optional; diff --git a/src/global/Constants.h b/src/global/Constants.h index 0e94c1ed99..f5632acc8b 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -35,43 +35,44 @@ constexpr inline size_t TEXT_PREDICATE_CARDINALITY_ESTIMATE = 1'000'000'000; constexpr inline size_t GALLOP_THRESHOLD = 1000; -constexpr inline char INTERNAL_PREDICATE_PREFIX_NAME[] = "ql"; - -constexpr inline char INTERNAL_PREDICATE_PREFIX[] = +constexpr inline char QLEVER_INTERNAL_PREFIX_NAME[] = "ql"; +constexpr inline char QLEVER_INTERNAL_PREFIX_URL[] = "http://qlever.cs.uni-freiburg.de/builtin-functions/"; -// Return a IRI of the form -// `` +// Make a QLever-internal IRI from `QL_INTERNAL_PREFIX_URL` by appending the +// concatenation of the given `suffixes` and enclosing the result in angle +// brackets (const and non-const version). template < ad_utility::detail::constexpr_str_cat_impl::ConstexprString... suffixes> -constexpr std::string_view makeInternalIriConst() { - return ad_utility::constexprStrCat<"<", INTERNAL_PREDICATE_PREFIX, +constexpr std::string_view makeQleverInternalIriConst() { + return ad_utility::constexprStrCat<"<", QLEVER_INTERNAL_PREFIX_URL, suffixes..., ">">(); } - -inline std::string makeInternalIri(const auto&... suffixes) { - return absl::StrCat("<", std::string_view{INTERNAL_PREDICATE_PREFIX}, +inline std::string makeQleverInternalIri(const auto&... suffixes) { + return absl::StrCat("<", std::string_view{QLEVER_INTERNAL_PREFIX_URL}, suffixes..., ">"); } -constexpr inline std::string_view INTERNAL_ENTITIES_URI_PREFIX = - ad_utility::constexprStrCat<"<", INTERNAL_PREDICATE_PREFIX>(); -constexpr inline std::string_view INTERNAL_PREDICATE_PREFIX_IRI = - makeInternalIriConst<"">(); + +constexpr inline std::string_view QLEVER_INTERNAL_PREFIX_IRI = + makeQleverInternalIriConst<"">(); +constexpr inline std::string_view + QLEVER_INTERNAL_PREFIX_IRI_WITHOUT_CLOSING_BRACKET = + ad_utility::constexprStrCat<"<", QLEVER_INTERNAL_PREFIX_URL>(); constexpr inline std::string_view CONTAINS_ENTITY_PREDICATE = - makeInternalIriConst<"contains-entity">(); + makeQleverInternalIriConst<"contains-entity">(); constexpr inline std::string_view CONTAINS_WORD_PREDICATE = - makeInternalIriConst<"contains-word">(); + makeQleverInternalIriConst<"contains-word">(); -constexpr inline std::string_view INTERNAL_TEXT_MATCH_PREDICATE = - makeInternalIriConst<"text">(); +constexpr inline std::string_view QLEVER_INTERNAL_TEXT_MATCH_PREDICATE = + makeQleverInternalIriConst<"text">(); constexpr inline std::string_view HAS_PREDICATE_PREDICATE = - makeInternalIriConst<"has-predicate">(); + makeQleverInternalIriConst<"has-predicate">(); constexpr inline std::string_view HAS_PATTERN_PREDICATE = - makeInternalIriConst<"has-pattern">(); + makeQleverInternalIriConst<"has-pattern">(); constexpr inline std::string_view DEFAULT_GRAPH_IRI = - makeInternalIriConst<"default-graph">(); -constexpr inline std::string_view INTERNAL_GRAPH_IRI = - makeInternalIriConst<"internal-graph">(); + makeQleverInternalIriConst<"default-graph">(); +constexpr inline std::string_view QLEVER_INTERNAL_GRAPH_IRI = + makeQleverInternalIriConst<"internal-graph">(); constexpr inline std::pair GEOF_PREFIX = { "geof:", "http://www.opengis.net/def/function/geosparql/"}; @@ -79,22 +80,25 @@ constexpr inline std::pair MATH_PREFIX = { "math:", "http://www.w3.org/2005/xpath-functions/math#"}; constexpr inline std::pair XSD_PREFIX = { "xsd", "http://www.w3.org/2001/XMLSchema#"}; +constexpr inline std::pair QL_PREFIX = { + QLEVER_INTERNAL_PREFIX_NAME, QLEVER_INTERNAL_PREFIX_URL}; -constexpr inline std::string_view INTERNAL_VARIABLE_PREFIX = +constexpr inline std::string_view QLEVER_INTERNAL_VARIABLE_PREFIX = "?_QLever_internal_variable_"; -constexpr inline std::string_view INTERNAL_BLANKNODE_VARIABLE_PREFIX = +constexpr inline std::string_view QLEVER_INTERNAL_BLANKNODE_VARIABLE_PREFIX = "?_QLever_internal_variable_bn_"; -constexpr inline std::string_view INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX = - "?_QLever_internal_variable_qp_"; +constexpr inline std::string_view + QLEVER_INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX = + "?_QLever_internal_variable_qp_"; constexpr inline std::string_view SCORE_VARIABLE_PREFIX = "?ql_score_"; constexpr inline std::string_view MATCHINGWORD_VARIABLE_PREFIX = "?ql_matchingword_"; constexpr inline std::string_view LANGUAGE_PREDICATE = - makeInternalIriConst<"langtag">(); + makeQleverInternalIriConst<"langtag">(); // this predicate is one of the supported identifiers for the SpatialJoin class. // It joins the two objects, if their distance is smaller or equal to the diff --git a/src/global/SpecialIds.h b/src/global/SpecialIds.h index 658998bf7b..b844e11724 100644 --- a/src/global/SpecialIds.h +++ b/src/global/SpecialIds.h @@ -27,7 +27,7 @@ inline const ad_utility::HashMap& specialIds() { {S{HAS_PREDICATE_PREDICATE}, Id::fromBits(1)}, {S{HAS_PATTERN_PREDICATE}, Id::fromBits(2)}, {S{DEFAULT_GRAPH_IRI}, Id::fromBits(3)}, - {S{INTERNAL_GRAPH_IRI}, Id::fromBits(4)}}; + {S{QLEVER_INTERNAL_GRAPH_IRI}, Id::fromBits(4)}}; // Perform the following checks: All the special IDs are unique, all of them // have the `Undefined` datatype, but none of them is equal to the "actual" diff --git a/src/index/ConstantsIndexBuilding.h b/src/index/ConstantsIndexBuilding.h index 5f7643484d..71d4897878 100644 --- a/src/index/ConstantsIndexBuilding.h +++ b/src/index/ConstantsIndexBuilding.h @@ -63,7 +63,7 @@ constexpr inline std::string_view TMP_BASENAME_COMPRESSION = ".tmp.for-prefix-compression"; // _________________________________________________________________ -constexpr inline std::string_view INTERNAL_INDEX_INFIX = ".internal"; +constexpr inline std::string_view QLEVER_INTERNAL_INDEX_INFIX = ".internal"; // _________________________________________________________________ // The degree of parallelism that is used for the index building step, where the diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index cd12e58b6b..7f5e479f59 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -268,7 +268,7 @@ std::pair IndexImpl::createInternalPSOandPOS( auto&& internalTriplesPsoSorter) { auto onDiskBaseBackup = onDiskBase_; auto configurationJsonBackup = configurationJson_; - onDiskBase_.append(INTERNAL_INDEX_INFIX); + onDiskBase_.append(QLEVER_INTERNAL_INDEX_INFIX); auto internalTriplesUnique = ad_utility::uniqueBlockView( internalTriplesPsoSorter.template getSortedBlocks<0>()); createPSOAndPOSImpl(NumColumnsIndexBuilding, std::move(internalTriplesUnique), @@ -560,7 +560,7 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( idOfHasPatternDuringIndexBuilding_ = mergeRes.specialIdMapping().at(HAS_PATTERN_PREDICATE); idOfInternalGraphDuringIndexBuilding_ = - mergeRes.specialIdMapping().at(INTERNAL_GRAPH_IRI); + mergeRes.specialIdMapping().at(QLEVER_INTERNAL_GRAPH_IRI); LOG(INFO) << "Number of words in external vocabulary: " << res.vocabularyMetaData_.numWordsTotal() - sizeInternalVocabulary << std::endl; @@ -858,7 +858,7 @@ void IndexImpl::createFromOnDiskIndex(const string& onDiskBase) { << vocab_.size() << std::endl; auto range1 = - vocab_.prefixRanges(absl::StrCat("<", INTERNAL_PREDICATE_PREFIX)); + vocab_.prefixRanges(QLEVER_INTERNAL_PREFIX_IRI_WITHOUT_CLOSING_BRACKET); auto range2 = vocab_.prefixRanges("@"); auto isInternalId = [range1, range2](Id id) { // TODO What about internal vocab stuff for update queries? this @@ -1464,7 +1464,7 @@ size_t IndexImpl::getCardinality(const TripleComponent& comp, // or objects anyway. // TODO Find out what the effect of this special case is for the // query planning. - if (comp == INTERNAL_TEXT_MATCH_PREDICATE) { + if (comp == QLEVER_INTERNAL_TEXT_MATCH_PREDICATE) { return TEXT_PREDICATE_CARDINALITY_ESTIMATE; } if (std::optional relId = comp.toValueId(getVocab()); relId.has_value()) { diff --git a/src/index/Permutation.cpp b/src/index/Permutation.cpp index 0b173e0bdb..16f5113d68 100644 --- a/src/index/Permutation.cpp +++ b/src/index/Permutation.cpp @@ -26,7 +26,8 @@ void Permutation::loadFromDisk(const std::string& onDiskBase, internalPermutation_ = std::make_unique(permutation_, allocator_); internalPermutation_->loadFromDisk( - absl::StrCat(onDiskBase, INTERNAL_INDEX_INFIX), isInternalId_, false); + absl::StrCat(onDiskBase, QLEVER_INTERNAL_INDEX_INFIX), isInternalId_, + false); } if constexpr (MetaData::isMmapBased_) { meta_.setup(onDiskBase + ".index" + fileSuffix_ + MMAP_FILE_SUFFIX, diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index 9e64e0a23e..9afc172f19 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -111,7 +111,7 @@ bool Vocabulary::shouldEntityBeExternalized( // Never externalize the internal IRIs as they are sometimes added before or // after the externalization happens and we thus get inconsistent behavior // etc. for `ql:langtag`. - if (word.starts_with(INTERNAL_ENTITIES_URI_PREFIX)) { + if (word.starts_with(QLEVER_INTERNAL_PREFIX_IRI_WITHOUT_CLOSING_BRACKET)) { return false; } // Never externalize the special IRIs starting with `@` (for example, diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index 3c171d2c65..8b9322796c 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -125,7 +125,8 @@ struct VocabularyMetaData { size_t numBlankNodesTotal_ = 0; IdRangeForPrefix langTaggedPredicates_{ std::string{ad_utility::languageTaggedPredicatePrefix}}; - IdRangeForPrefix internalEntities_{std::string{INTERNAL_ENTITIES_URI_PREFIX}}; + IdRangeForPrefix internalEntities_{ + std::string{QLEVER_INTERNAL_PREFIX_IRI_WITHOUT_CLOSING_BRACKET}}; ad_utility::HashMap specialIdMapping_; const ad_utility::HashMap* globalSpecialIds_ = diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index c07f25768b..c9496608e7 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -253,7 +253,7 @@ void ParsedQuery::registerVariablesVisibleInQueryBody( // _____________________________________________________________________________ void ParsedQuery::registerVariableVisibleInQueryBody(const Variable& variable) { auto addVariable = [&variable](auto& clause) { - if (!variable.name().starts_with(INTERNAL_VARIABLE_PREFIX)) { + if (!variable.name().starts_with(QLEVER_INTERNAL_VARIABLE_PREFIX)) { clause.addVisibleVariable(variable); } }; @@ -285,7 +285,8 @@ void ParsedQuery::GraphPattern::addLanguageFilter(const Variable& variable, if (triple.o_ == variable && (triple.p_._operation == PropertyPath::Operation::IRI && !isVariable(triple.p_)) && - !triple.p_._iri.starts_with(INTERNAL_ENTITIES_URI_PREFIX)) { + !triple.p_._iri.starts_with( + QLEVER_INTERNAL_PREFIX_IRI_WITHOUT_CLOSING_BRACKET)) { matchingTriples.push_back(&triple); } } @@ -492,14 +493,14 @@ void ParsedQuery::addOrderByClause(OrderClause orderClause, bool isGroupBy, // ________________________________________________________________ Variable ParsedQuery::getNewInternalVariable() { - auto variable = - Variable{absl::StrCat(INTERNAL_VARIABLE_PREFIX, numInternalVariables_)}; + auto variable = Variable{ + absl::StrCat(QLEVER_INTERNAL_VARIABLE_PREFIX, numInternalVariables_)}; numInternalVariables_++; return variable; } Variable ParsedQuery::blankNodeToInternalVariable(std::string_view blankNode) { AD_CONTRACT_CHECK(blankNode.starts_with("_:")); - return Variable{ - absl::StrCat(INTERNAL_BLANKNODE_VARIABLE_PREFIX, blankNode.substr(2))}; + return Variable{absl::StrCat(QLEVER_INTERNAL_BLANKNODE_VARIABLE_PREFIX, + blankNode.substr(2))}; } diff --git a/src/parser/SparqlParser.cpp b/src/parser/SparqlParser.cpp index c043c69059..e75855fe91 100644 --- a/src/parser/SparqlParser.cpp +++ b/src/parser/SparqlParser.cpp @@ -14,7 +14,7 @@ ParsedQuery SparqlParser::parseQuery(std::string query) { using S = std::string; sparqlParserHelpers::ParserAndVisitor p{ std::move(query), - {{S{INTERNAL_PREDICATE_PREFIX_NAME}, S{INTERNAL_PREDICATE_PREFIX_IRI}}}}; + {{S{QLEVER_INTERNAL_PREFIX_NAME}, S{QLEVER_INTERNAL_PREFIX_IRI}}}}; // Note: `AntlrParser::query` is a method of `AntlrParser` (which is an alias // for `SparqlAutomaticParser`) that returns the `QueryContext*` for the whole // query. diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index f0f9e105e8..7e8ae8facf 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -87,6 +87,7 @@ ExpressionPtr Visitor::processIriFunctionCall( return false; } }; + // Helper lambda that checks the number of arguments and throws an error // if it's not right. The `functionName` and `prefixName` are used for the // error message. @@ -102,6 +103,7 @@ ExpressionPtr Visitor::processIriFunctionCall( numArgs == 1 ? " argument" : " arguments")); } }; + // Geo functions. if (checkPrefix(GEOF_PREFIX)) { if (functionName == "distance") { @@ -114,11 +116,11 @@ ExpressionPtr Visitor::processIriFunctionCall( } else if (functionName == "latitude") { checkNumArgs(1); return sparqlExpression::makeLatitudeExpression(std::move(argList[0])); - } else if (functionName == "isPointWKT") { - checkNumArgs(1); - return sparqlExpression::makeIsWktPointExpression(std::move(argList[0])); } - } else if (checkPrefix(MATH_PREFIX)) { + } + + // Math functions. + if (checkPrefix(MATH_PREFIX)) { if (functionName == "log") { checkNumArgs(1); return sparqlExpression::makeLogExpression(std::move(argList[0])); @@ -142,7 +144,10 @@ ExpressionPtr Visitor::processIriFunctionCall( return sparqlExpression::makePowExpression(std::move(argList[0]), std::move(argList[1])); } - } else if (checkPrefix(XSD_PREFIX)) { + } + + // XSD conversion functions. + if (checkPrefix(XSD_PREFIX)) { if (functionName == "integer" || functionName == "int") { checkNumArgs(1); return sparqlExpression::makeConvertToIntExpression( @@ -153,6 +158,18 @@ ExpressionPtr Visitor::processIriFunctionCall( std::move(argList[0])); } } + + // QLever-internal functions. + // + // NOTE: Predicates like `ql:has-predicate` etc. are handled elsewhere. + if (checkPrefix(QL_PREFIX)) { + if (functionName == "isGeoPoint") { + checkNumArgs(1); + return sparqlExpression::makeIsGeoPointExpression(std::move(argList[0])); + } + } + + // If none of the above matched, report unknown function. reportNotSupported(ctx, "Function \""s + iri.toStringRepresentation() + "\" is"); } diff --git a/src/util/Conversions.cpp b/src/util/Conversions.cpp index 1d56040aac..8c2fe16006 100644 --- a/src/util/Conversions.cpp +++ b/src/util/Conversions.cpp @@ -1,6 +1,6 @@ -// Copyright 2022, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach +// Copyright 2022 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach #include "util/Conversions.h" @@ -23,7 +23,7 @@ namespace ad_utility { // _________________________________________________________ triple_component::Iri convertLangtagToEntityUri(const string& tag) { - return triple_component::Iri::fromIriref(makeInternalIri("@", tag)); + return triple_component::Iri::fromIriref(makeQleverInternalIri("@", tag)); } // _________________________________________________________ diff --git a/test/AggregateExpressionTest.cpp b/test/AggregateExpressionTest.cpp index af53e45edb..2977548812 100644 --- a/test/AggregateExpressionTest.cpp +++ b/test/AggregateExpressionTest.cpp @@ -196,7 +196,7 @@ TEST(AggregateExpression, CountStar) { // This variable is internal, so it doesn't count towards the `COUNT(DISTINCT // *)` and doesn't change the result. t.varToColMap[Variable{ - absl::StrCat(INTERNAL_VARIABLE_PREFIX, "someInternalVar")}] = { + absl::StrCat(QLEVER_INTERNAL_VARIABLE_PREFIX, "someInternalVar")}] = { 0, ColumnIndexAndTypeInfo::UndefStatus::AlwaysDefined}; t.qec->getQueryTreeCache().clearAll(); EXPECT_THAT(m, matcher(totalSize)); diff --git a/test/ConstantsTest.cpp b/test/ConstantsTest.cpp index 4c868cdd03..9e19c5ac20 100644 --- a/test/ConstantsTest.cpp +++ b/test/ConstantsTest.cpp @@ -26,9 +26,9 @@ TEST(Constants, testDefaultQueryTimeoutIsStriclyPositive) { EXPECT_NO_THROW(RuntimeParameters().set<"default-query-timeout">(1s)); } -TEST(Constants, makeInternalIri) { - EXPECT_EQ(makeInternalIri("hi", "-bye"), - (makeInternalIriConst<"hi", "-bye">())); - EXPECT_EQ(makeInternalIri("hi", "-bye"), +TEST(Constants, makeQleverInternalIri) { + EXPECT_EQ(makeQleverInternalIri("hi", "-bye"), + (makeQleverInternalIriConst<"hi", "-bye">())); + EXPECT_EQ(makeQleverInternalIri("hi", "-bye"), ""); } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 5db2454f1a..e88b46c8da 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -710,7 +710,7 @@ namespace { // A helper function to recreate the internal variables added by the query // planner for transitive paths. std::string internalVar(int i) { - return absl::StrCat(INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX, i); + return absl::StrCat(QLEVER_INTERNAL_VARIABLE_QUERY_PLANNER_PREFIX, i); } } // namespace diff --git a/test/SparqlAntlrParserTest.cpp b/test/SparqlAntlrParserTest.cpp index a0398bd190..48d2d6c0f8 100644 --- a/test/SparqlAntlrParserTest.cpp +++ b/test/SparqlAntlrParserTest.cpp @@ -43,8 +43,8 @@ auto iri = ad_utility::testing::iri; auto lit = ad_utility::testing::tripleComponentLiteral; const ad_utility::HashMap defaultPrefixMap{ - {std::string{INTERNAL_PREDICATE_PREFIX_NAME}, - std::string{INTERNAL_PREDICATE_PREFIX_IRI}}}; + {std::string{QLEVER_INTERNAL_PREFIX_NAME}, + std::string{QLEVER_INTERNAL_PREFIX_IRI}}}; template auto parse = @@ -817,9 +817,10 @@ TEST(SparqlParser, triplesSameSubjectPath) { ExpectCompleteParse<&Parser::triplesSameSubjectPath, true>{}; expectTriplesConstruct("_:1 ?baz", {{BlankNode(false, "1"), PathIri(""), Var{"?baz"}}}); - expectTriples("_:one ?baz", - {{Var{absl::StrCat(INTERNAL_BLANKNODE_VARIABLE_PREFIX, "one")}, - PathIri(""), Var{"?baz"}}}); + expectTriples( + "_:one ?baz", + {{Var{absl::StrCat(QLEVER_INTERNAL_BLANKNODE_VARIABLE_PREFIX, "one")}, + PathIri(""), Var{"?baz"}}}); expectTriples("10.0 true", {{Literal(10.0), PathIri(""), Literal(true)}}); expectTriples( @@ -1730,14 +1731,15 @@ TEST(SparqlParser, FunctionCall) { auto geof = absl::StrCat("<", GEOF_PREFIX.second); auto math = absl::StrCat("<", MATH_PREFIX.second); auto xsd = absl::StrCat("<", XSD_PREFIX.second); + auto ql = absl::StrCat("<", QL_PREFIX.second); // Correct function calls. Check that the parser picks the correct expression. expectFunctionCall(absl::StrCat(geof, "latitude>(?x)"), matchUnary(&makeLatitudeExpression)); expectFunctionCall(absl::StrCat(geof, "longitude>(?x)"), matchUnary(&makeLongitudeExpression)); - expectFunctionCall(absl::StrCat(geof, "isPointWKT>(?x)"), - matchUnary(&makeIsWktPointExpression)); + expectFunctionCall(absl::StrCat(ql, "isGeoPoint>(?x)"), + matchUnary(&makeIsGeoPointExpression)); expectFunctionCall( absl::StrCat(geof, "distance>(?a, ?b)"), matchNary(&makeDistExpression, Variable{"?a"}, Variable{"?b"})); diff --git a/test/SparqlAntlrParserTestHelpers.h b/test/SparqlAntlrParserTestHelpers.h index 788ece3fb8..43c0de28aa 100644 --- a/test/SparqlAntlrParserTestHelpers.h +++ b/test/SparqlAntlrParserTestHelpers.h @@ -287,10 +287,10 @@ inline auto BlankNode = [](bool generated, const std::string& label) { }; inline auto InternalVariable = [](const std::string& label) { - return MultiVariantWith( - testing::AllOf(AD_PROPERTY(::Variable, name, - testing::StartsWith(INTERNAL_VARIABLE_PREFIX)), - AD_PROPERTY(::Variable, name, testing::EndsWith(label)))); + return MultiVariantWith(testing::AllOf( + AD_PROPERTY(::Variable, name, + testing::StartsWith(QLEVER_INTERNAL_VARIABLE_PREFIX)), + AD_PROPERTY(::Variable, name, testing::EndsWith(label)))); }; // _____________________________________________________________________________ diff --git a/test/SparqlExpressionTest.cpp b/test/SparqlExpressionTest.cpp index da9083e23b..285f990bc8 100644 --- a/test/SparqlExpressionTest.cpp +++ b/test/SparqlExpressionTest.cpp @@ -1117,7 +1117,7 @@ TEST(SparqlExpression, testToNumericExpression) { TEST(SparqlExpression, geoSparqlExpressions) { auto checkLat = testUnaryExpression<&makeLatitudeExpression>; auto checkLong = testUnaryExpression<&makeLongitudeExpression>; - auto checkIsWktPoint = testUnaryExpression<&makeIsWktPointExpression>; + auto checkIsGeoPoint = testUnaryExpression<&makeIsGeoPointExpression>; auto checkDist = std::bind_front(testNaryExpression, &makeDistExpression); auto p = GeoPoint(26.8, 24.3); @@ -1137,11 +1137,11 @@ TEST(SparqlExpression, geoSparqlExpressions) { checkLat(v, vLat); checkLong(v, vLng); - checkIsWktPoint(v, B(true)); + checkIsGeoPoint(v, B(true)); checkDist(D(0.0), v, v); checkLat(idOrLitOrStringVec({"NotAPoint", I(12)}), Ids{U, U}); checkLong(idOrLitOrStringVec({D(4.2), "NotAPoint"}), Ids{U, U}); - checkIsWktPoint(IdOrLiteralOrIri{lit("NotAPoint")}, B(false)); + checkIsGeoPoint(IdOrLiteralOrIri{lit("NotAPoint")}, B(false)); checkDist(U, v, IdOrLiteralOrIri{I(12)}); checkDist(U, IdOrLiteralOrIri{I(12)}, v); checkDist(U, v, IdOrLiteralOrIri{lit("NotAPoint")});