Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spatial join bounding box #1546

Merged
merged 25 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e9ed3e4
add BoundingBox Algorithm
Jonathan24680 Oct 18, 2024
ada2978
Sonarqube issues
Jonathan24680 Nov 3, 2024
eeadcd7
Sonarqube issues
Jonathan24680 Nov 4, 2024
127ba53
dont use cpp 20 version constant of pi
Jonathan24680 Nov 4, 2024
dcb60dd
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 4, 2024
be09a86
part of first rework
Jonathan24680 Nov 8, 2024
c4ddec6
rework of some parts of the tests
Jonathan24680 Nov 9, 2024
122372b
mr feedback
Jonathan24680 Nov 18, 2024
12f3c13
Merge branch 'master' into SpatialJoinBoundingBox
Jonathan24680 Nov 18, 2024
213b02e
remove second definition of function
Jonathan24680 Nov 19, 2024
ecda249
add new macros AD_LOG vs LOG
joka921 Nov 19, 2024
97d039b
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 19, 2024
603ec46
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 19, 2024
3831da8
PR feedback
Jonathan24680 Nov 20, 2024
9ffaa0a
Apply the suggestion by Hannah.
joka921 Nov 20, 2024
032eade
Clang-format.
joka921 Nov 20, 2024
d50e2cc
Merge branch 'AD_LOG' of https://github.com/joka921/QLever into AD_LOG
Jonathan24680 Nov 20, 2024
24e31c6
Merge branch 'AD_LOG' into SpatialJoinBoundingBox
Jonathan24680 Nov 20, 2024
8285355
merge log change and fix allocator
Jonathan24680 Nov 20, 2024
56e1618
PR feedback
Jonathan24680 Nov 20, 2024
7d8b98b
pr feedback
Jonathan24680 Nov 21, 2024
89491a1
PR feedback
Jonathan24680 Nov 21, 2024
8743986
merge master into SpatialJoinBoundingBox
Jonathan24680 Nov 22, 2024
23f2ed2
PR feedback
Jonathan24680 Nov 22, 2024
ce5db45
MR feedback
Jonathan24680 Nov 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@
if (childLeft_ && childRight_) {
size_t inputEstimate =
childLeft_->getSizeEstimate() * childRight_->getSizeEstimate();
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return inputEstimate * inputEstimate;
} else {
// Let n be the size of the left table and m the size of the right table.
Expand Down Expand Up @@ -337,10 +337,21 @@
Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(),
addDistToResult_, config_};
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return algorithms.BaselineAlgorithm();
} else {
} else if (algorithm_ == Algorithm::S2Geometry) {
return algorithms.S2geometryAlgorithm();
} else if (algorithm_ == Algorithm::BoundingBox) {
// as the BoundingBoxAlgorithms only works for max distance and not for
// nearest neighbors, S2geometry gets called as a backup, if the query is
// asking for the nearest neighbors
if (std::get_if<MaxDistanceConfig>(&config_)) {
return algorithms.BoundingBoxAlgorithm();
} else {
return algorithms.S2geometryAlgorithm();
}
} else {
AD_THROW("choose a valid Algorithm");

Check warning on line 354 in src/engine/SpatialJoin.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoin.cpp#L354

Added line #L354 was not covered by tests
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down
14 changes: 10 additions & 4 deletions src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,20 @@ class SpatialJoin : public Operation {
// purposes
std::optional<size_t> getMaxResults() const;

void selectAlgorithm(bool useBaselineAlgorithm) {
useBaselineAlgorithm_ = useBaselineAlgorithm;
}
// options which can be used for the algorithm, which calculates the result
enum class Algorithm { Baseline, S2Geometry, BoundingBox };

void selectAlgorithm(Algorithm algorithm) { algorithm_ = algorithm; }

std::pair<size_t, size_t> onlyForTestingGetConfig() const {
return std::pair{getMaxDist().value_or(-1), getMaxResults().value_or(-1)};
}

std::variant<NearestNeighborsConfig, MaxDistanceConfig>
onlyForTestingGetActualConfig() const {
return config_;
}

std::shared_ptr<QueryExecutionTree> onlyForTestingGetLeftChild() const {
return childLeft_;
}
Expand Down Expand Up @@ -135,5 +141,5 @@ class SpatialJoin : public Operation {
// between the two objects
bool addDistToResult_ = true;
const string nameDistanceInternal_ = "?distOfTheTwoObjectsAddedInternally";
bool useBaselineAlgorithm_ = false;
Algorithm algorithm_ = Algorithm::BoundingBox;
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
};
283 changes: 283 additions & 0 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
#include <s2/s2point_index.h>
#include <s2/util/units/length-units.h>

#include "math.h"
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
#include "util/GeoSparqlHelpers.h"

using namespace BoostGeometryNamespace;

// ____________________________________________________________________________
SpatialJoinAlgorithms::SpatialJoinAlgorithms(
QueryExecutionContext* qec, PreparedSpatialJoinParams params,
Expand Down Expand Up @@ -223,3 +226,283 @@
return Result(std::move(result), std::vector<ColumnIndex>{},
Result::getMergedLocalVocab(*resultLeft, *resultRight));
}

// ____________________________________________________________________________
std::vector<Box> SpatialJoinAlgorithms::computeBoundingBox(
const Point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 237 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L236-L237

Added lines #L236 - L237 were not covered by tests
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
// haversine function
auto haversine = [](double theta) { return (1 - std::cos(theta)) / 2; };

// inverse haversine function
auto archaversine = [](double theta) { return std::acos(1 - 2 * theta); };

// safety buffer for numerical inaccuracies
double maxDistInMetersBuffer;
if (maxDist.value() < 10) {
maxDistInMetersBuffer = 10;
} else if (static_cast<double>(maxDist.value()) <
static_cast<double>(std::numeric_limits<long long>::max()) /
1.02) {
maxDistInMetersBuffer = 1.01 * static_cast<double>(maxDist.value());
} else {
maxDistInMetersBuffer =
static_cast<double>(std::numeric_limits<long long>::max());
}

Check warning on line 255 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L253-L255

Added lines #L253 - L255 were not covered by tests

// for large distances, where the lower calculation would just result in
// a single bounding box for the whole planet, do an optimized version
if (static_cast<double>(maxDist.value()) > circumferenceMax_ / 4.0 &&
static_cast<double>(maxDist.value()) < circumferenceMax_ / 2.01) {
return computeUsingAntiBoundingBox(startPoint);
}

// compute latitude bound
double maxDistInDegrees = maxDistInMetersBuffer * (360 / circumferenceMax_);
double upperLatBound = startPoint.get<1>() + maxDistInDegrees;
double lowerLatBound = startPoint.get<1>() - maxDistInDegrees;

auto southPoleReached = isAPoleTouched(lowerLatBound).at(1);
auto northPoleReached = isAPoleTouched(upperLatBound).at(0);

if (southPoleReached || northPoleReached) {
return {Box(Point(-180.0f, lowerLatBound), Point(180.0f, upperLatBound))};
}

// compute longitude bound. For an explanation of the calculation and the
// naming convention see my master thesis
double alpha = maxDistInMetersBuffer / radius_;
double gamma = (90 - std::abs(startPoint.get<1>())) * (2 * M_PI / 360);
joka921 marked this conversation as resolved.
Show resolved Hide resolved
double beta = std::acos(std::cos(gamma) / std::cos(alpha));
double delta = 0;
if (maxDistInMetersBuffer > circumferenceMax_ / 20) {
// use law of cosines
delta = std::acos((std::cos(alpha) - std::cos(gamma) * std::cos(beta)) /
(std::sin(gamma) * std::sin(beta)));
} else {
// use law of haversines for numerical stability
delta = archaversine((haversine(alpha - haversine(gamma - beta))) /
(std::sin(gamma) * std::sin(beta)));
}
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
double lonRange = delta * 360 / (2 * M_PI);
double leftLonBound = startPoint.get<0>() - lonRange;
double rightLonBound = startPoint.get<0>() + lonRange;
// test for "overflows" and create two bounding boxes if necessary
if (leftLonBound < -180) {
auto box1 =
Box(Point(-180, lowerLatBound), Point(rightLonBound, upperLatBound));
auto box2 = Box(Point(leftLonBound + 360, lowerLatBound),
Point(180, upperLatBound));
return {box1, box2};
} else if (rightLonBound > 180) {
auto box1 =
Box(Point(leftLonBound, lowerLatBound), Point(180, upperLatBound));
auto box2 = Box(Point(-180, lowerLatBound),
Point(rightLonBound - 360, upperLatBound));
return {box1, box2};
}
// default case, when no bound has an "overflow"
return {Box(Point(leftLonBound, lowerLatBound),
Point(rightLonBound, upperLatBound))};
}

// ____________________________________________________________________________
std::vector<Box> SpatialJoinAlgorithms::computeUsingAntiBoundingBox(
const Point& startPoint) const {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 320 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L319-L320

Added lines #L319 - L320 were not covered by tests
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved

// point on the opposite side of the globe
Point antiPoint(startPoint.get<0>() + 180, startPoint.get<1>() * -1);
if (antiPoint.get<0>() > 180) {
antiPoint.set<0>(antiPoint.get<0>() - 360);
}
// for an explanation of the formula see the master thesis. Divide by two two
// only consider the distance from the point to the antiPoint, subtract
// maxDist and a safety margine from that
double antiDist =
(circumferenceMin_ / 2.0) - static_cast<double>(maxDist.value()) * 1.01;
// use the bigger circumference as an additional safety margin, use 2.01
// instead of 2.0 because of rounding inaccuracies in floating point
// operations
double distToAntiPoint = (360 / circumferenceMax_) * (antiDist / 2.01);
double upperBound = antiPoint.get<1>() + distToAntiPoint;
double lowerBound = antiPoint.get<1>() - distToAntiPoint;
double leftBound = antiPoint.get<0>() - distToAntiPoint;
double rightBound = antiPoint.get<0>() + distToAntiPoint;
bool northPoleTouched = false;
bool southPoleTouched = false;
bool boxCrosses180Longitude = false; // if the 180 to -180 line is touched
// if a pole is crossed, ignore the part after the crossing
if (upperBound > 90) {
upperBound = 90;
northPoleTouched = true;
}
if (lowerBound < -90) {
lowerBound = -90;
southPoleTouched = true;
}
if (leftBound < -180) {
leftBound += 360;
}
if (rightBound > 180) {
rightBound -= 360;
}
if (rightBound < leftBound) {
boxCrosses180Longitude = true;
}
// compute bounding boxes using the anti bounding box from above
std::vector<Box> boxes;
if (!northPoleTouched) {
// add upper bounding box(es)
if (boxCrosses180Longitude) {
boxes.emplace_back(Point(leftBound, upperBound), Point(180, 90));
boxes.emplace_back(Point(-180, upperBound), Point(rightBound, 90));
} else {
boxes.emplace_back(Point(leftBound, upperBound), Point(rightBound, 90));
}
}
if (!southPoleTouched) {
// add lower bounding box(es)
if (boxCrosses180Longitude) {
boxes.emplace_back(Point(leftBound, -90), Point(180, lowerBound));
boxes.emplace_back(Point(-180, -90), Point(rightBound, lowerBound));
} else {
boxes.emplace_back(Point(leftBound, -90), Point(rightBound, lowerBound));
}
}
// add the box(es) inbetween the longitude lines
if (boxCrosses180Longitude) {
// only one box needed to cover the longitudes
boxes.emplace_back(Point(rightBound, -90), Point(leftBound, 90));
} else {
// two boxes needed, one left and one right of the anti bounding box
boxes.emplace_back(Point(-180, -90), Point(leftBound, 90));
boxes.emplace_back(Point(rightBound, -90), Point(180, 90));
}
return boxes;
}

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::containedInBoundingBoxes(
const std::vector<Box>& bbox, Point point1) {
convertToNormalCoordinates(point1);

return std::ranges::any_of(bbox, [point1](const Box& aBox) {
return boost::geometry::covered_by(point1, aBox);
});
}

// ____________________________________________________________________________
void SpatialJoinAlgorithms::convertToNormalCoordinates(Point& point) {
// correct lon and lat bounds if necessary
while (point.get<0>() < -180) {
point.set<0>(point.get<0>() + 360);
}
while (point.get<0>() > 180) {
point.set<0>(point.get<0>() - 360);
}
if (point.get<1>() < -90) {
point.set<1>(-90);
} else if (point.get<1>() > 90) {
point.set<1>(90);
}
}

std::array<bool, 2> SpatialJoinAlgorithms::isAPoleTouched(
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
const double& latitude) {
bool northPoleReached = false;
bool southPoleReached = false;
if (latitude >= 90) {
northPoleReached = true;
}
if (latitude <= -90) {
southPoleReached = true;
}
return std::array{northPoleReached, southPoleReached};
}

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
auto printWarning = []() {
AD_LOG_WARN
<< "expected a point here, but no point is given. Skipping this point"
<< std::endl;
};

Check warning on line 438 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L435-L438

Added lines #L435 - L438 were not covered by tests
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
IdTable result{numColumns, qec_->getAllocator()};

// create r-tree for smaller result table
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have to measure, It might be faster to create the index for the bigger IdTable.
But as everything else is symmetric, you should be able to easily switch this for experiments.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's a point, which is already on my list for evaluations. I would postpone it until the evaluation phase if that's fine with you

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely.

auto smallerResult = idTableLeft;
auto otherResult = idTableRight;
bool leftResSmaller = true;
auto smallerResJoinCol = leftJoinCol;
auto otherResJoinCol = rightJoinCol;
if (idTableLeft->numRows() > idTableRight->numRows()) {
std::swap(smallerResult, otherResult);
leftResSmaller = false;
std::swap(smallerResJoinCol, otherResJoinCol);
}

Check warning on line 453 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L450-L453

Added lines #L450 - L453 were not covered by tests
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved

bgi::rtree<Value, bgi::quadratic<16>> rtree;
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
for (size_t i = 0; i < smallerResult->numRows(); i++) {
// get point of row i
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);

if (!geopoint) {
printWarning();
continue;
}

Check warning on line 463 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L461-L463

Added lines #L461 - L463 were not covered by tests

Point p(geopoint.value().getLng(), geopoint.value().getLat());

// add every point together with the row number into the rtree
rtree.insert(std::make_pair(std::move(p), i));
}
for (size_t i = 0; i < otherResult->numRows(); i++) {
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);

if (!geopoint1) {
printWarning();
continue;
}

Check warning on line 476 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L474-L476

Added lines #L474 - L476 were not covered by tests

Point p(geopoint1.value().getLng(), geopoint1.value().getLat());

// query the other rtree for every point using the following bounding box
std::vector<Box> bbox = computeBoundingBox(p);
std::vector<Value, ad_utility::AllocatorWithLimit<Value>> results{
qec_->getAllocator()};
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved

std::ranges::for_each(bbox, [&](const Box& bbox) {
rtree.query(bgi::intersects(bbox), std::back_inserter(results));
});

std::ranges::for_each(results, [&](const Value& res) {
size_t rowLeft = res.second;
size_t rowRight = i;
if (!leftResSmaller) {
std::swap(rowLeft, rowRight);
}

Check warning on line 494 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L493-L494

Added lines #L493 - L494 were not covered by tests
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
if (distance.getDatatype() == Datatype::Int &&
distance.getInt() <= maxDist) {
Jonathan24680 marked this conversation as resolved.
Show resolved Hide resolved
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
rowRight, distance);
}
});
}
auto resTable =
Result(std::move(result), std::vector<ColumnIndex>{},
Result::getMergedLocalVocab(*resultLeft, *resultRight));
return resTable;
}
Loading
Loading