Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise Simpson Desert tests #896

Merged
merged 9 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_SERVICE_HERE;
import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_WAIT_ALL_STOPS;
import static com.conveyal.r5.common.Util.isNullOrEmpty;
import static com.conveyal.r5.profile.PerTargetPropagater.MM_PER_METER;

/**
Expand Down Expand Up @@ -77,19 +78,21 @@ public OneOriginResult computeTravelTimes() {

// Find the set of destinations for a travel time calculation, not yet linked to the street network, and with
// no associated opportunities. By finding the extents and destinations up front, we ensure the exact same
// destination pointset is used for all steps below.
// destination PointSet is used for all steps below.
// This reuses the logic for finding the appropriate grid size and linking, which is now in the NetworkPreloader.
// We could change the preloader to retain these values in a compound return type, to avoid repetition here.
PointSet destinations;

if (request instanceof RegionalTask
&& !request.makeTauiSite
&& request.destinationPointSets[0] instanceof FreeFormPointSet
) {
// Freeform; destination pointset was set by handleOneRequest in the main AnalystWorker
// Freeform destinations. Destination PointSet was set by handleOneRequest in the main AnalystWorker.
destinations = request.destinationPointSets[0];
} else if (!isNullOrEmpty(request.destinationPointSets)) {
LOG.warn("ONLY VALID IN TESTING: Using PointSet object embedded in request outside regional analysis.");
destinations = request.destinationPointSets[0];
} else {
// Gridded (non-freeform) destinations. The extents are found differently in regional and single requests.
// Gridded (non-freeform) destinations. This method finds them differently for regional and single requests.
WebMercatorExtents destinationGridExtents = request.getWebMercatorExtents();
// Make a WebMercatorGridPointSet with the right extents, referring to the network's base grid and linkage.
destinations = AnalysisWorkerTask.gridPointSetCache.get(destinationGridExtents, network.fullExtentGridPointSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ of travel time (e.g. waiting) and paths should also be returned.
REGIONAL_ANALYSIS
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public AnalysisWorkerTask clone () {
// no need to catch CloneNotSupportedException, it's caught in ProfileRequest::clone
return (AnalysisWorkerTask) super.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public WebMercatorExtents getWebMercatorExtents() {
}
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public RegionalTask clone () {
return (RegionalTask) super.clone();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
package com.conveyal.r5.analyst.cluster;

import com.conveyal.r5.analyst.FreeFormPointSet;
import com.conveyal.r5.analyst.PointSet;
import com.conveyal.r5.analyst.WebMercatorExtents;
import com.conveyal.r5.profile.ProfileRequest;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandles;

import static com.conveyal.r5.common.Util.isNullOrEmpty;
import static com.google.common.base.Preconditions.checkState;

/**
* Instances are serialized and sent from the backend to workers processing single point,
Expand All @@ -13,6 +23,8 @@
*/
public class TravelTimeSurfaceTask extends AnalysisWorkerTask {

private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

// FIXME red flag - what is this enum enumerating Java types?

@Override
Expand Down Expand Up @@ -48,8 +60,22 @@ public WebMercatorExtents getWebMercatorExtents() {

@Override
public int nTargetsPerOrigin () {
// In TravelTimeSurfaceTasks, the set of destinations is always determined by the web mercator extents.
return width * height;
// In TravelTimeSurfaceTasks, the set of destinations is always determined by the web mercator extents in the
// request. A single WebMercatorGridPointSet is created with those extents. A single small freeform PointSet may
// also be present, but only in testing situations. The checkState assertions serve to verify assumptions that
// destinationPointSets are always set and we can polymorphically fetch the number of items for all normal and
// testing use cases. Once that is clearly working the assertions could be removed.
checkState(!isNullOrEmpty(destinationPointSets), "Expected destination PointSets to be present.");
checkState(destinationPointSets.length == 1, "Expected a single destination PointSet in TravelTimeSurfaceTask.");
PointSet destinations = destinationPointSets[0];
int nFeatures = destinations.featureCount();
if (destinations instanceof FreeFormPointSet) {
LOG.warn("Should currently be used only in testing: FreeFormPointSet specified in TravelTimeSurfaceTask.");
checkState(nFeatures == 1);
} else {
checkState(nFeatures == width * height);
}
return nFeatures;
}

}
12 changes: 4 additions & 8 deletions src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ public class FastRaptorWorker {
*/
public static final int UNREACHED = Integer.MAX_VALUE;

/**
* Minimum time between alighting from one vehicle and boarding another, in seconds.
* TODO make this configurable, and use loop-transfers from transfers.txt.
*/
public static final int BOARD_SLACK_SECONDS = 60;

public static final int SECONDS_PER_MINUTE = 60;

/**
Expand All @@ -70,8 +64,10 @@ public class FastRaptorWorker {
private static final int DEPARTURE_STEP_SEC = 60;

/**
* Minimum wait for boarding to account for schedule variation.
* FIXME clarify why this is separate from BOARD_SLACK. If it is not, merge the two constants into BOARD_SLACK_SEC.
* To be considered for boarding, a vehicle must depart at least this long after the rider arrives at the stop.
* Intuitively, "leave this long for transfers" to account for schedule variation or other unexpected variations.
* This is separate from BOARD_SLACK_SECONDS used in McRaptor and point-to-point searches, in case someone needs
* to set them independently.
*/
private static final int MINIMUM_BOARD_WAIT_SEC = 60;

Expand Down
12 changes: 10 additions & 2 deletions src/main/java/com/conveyal/r5/profile/PathWithTimes.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
* A path that also includes itineraries and bounds for all possible trips on the paths (even those which may not be optimal)
*/
public class PathWithTimes extends Path {

/**
* Minimum time between alighting from one vehicle and boarding another, in seconds.
* Appears to be used only in McRaptor and point-to-point searches.
* TODO make this configurable, and use loop-transfers from transfers.txt.
*/
public static final int BOARD_SLACK_SECONDS = 60;

/** Stats for the entire path */
public Stats stats;

Expand Down Expand Up @@ -92,7 +100,7 @@ private void computeTimes (TransportNetwork network, ProfileRequest req, TIntInt
// loop over departures within the time window
// firstTrip is the trip on the first pattern
int firstTrip = 0;
while (times[0][firstTrip][0] < req.fromTime + accessTime + FastRaptorWorker.BOARD_SLACK_SECONDS) firstTrip++;
while (times[0][firstTrip][0] < req.fromTime + accessTime + BOARD_SLACK_SECONDS) firstTrip++;

// now interleave times
double walkSpeedMillimetersPerSecond = req.walkSpeed * 1000;
Expand Down Expand Up @@ -137,7 +145,7 @@ private void computeTimes (TransportNetwork network, ProfileRequest req, TIntInt

// TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
// See also below in computeStatistics
time = times[patIdx][trip][1] + transferTime + FastRaptorWorker.BOARD_SLACK_SECONDS;
time = times[patIdx][trip][1] + transferTime + BOARD_SLACK_SECONDS;
itin.arriveAtBoardStopTimes[patIdx + 1] = time;
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/conveyal/r5/profile/ProfileRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ public class ProfileRequest implements Serializable, Cloneable {
*/
public int monteCarloDraws = 220;

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public ProfileRequest clone () {
try {
return (ProfileRequest) super.clone();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/r5/profile/StatsCalculator.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static StatsCollection computeStatistics (ProfileRequest req, int accessT

for (int start = req.fromTime; start < req.toTime; start += 60) {
// TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
int timeAtOriginStop = start + accessTime + FastRaptorWorker.BOARD_SLACK_SECONDS;
int timeAtOriginStop = start + accessTime + PathWithTimes.BOARD_SLACK_SECONDS;
int bestTimeAtDestinationStop = Integer.MAX_VALUE;

PathWithTimes.Itinerary bestItinerary = null;
Expand Down
81 changes: 77 additions & 4 deletions src/test/java/com/conveyal/r5/analyst/network/Distribution.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package com.conveyal.r5.analyst.network;

import com.conveyal.r5.analyst.cluster.TravelTimeResult;
import com.google.common.base.Preconditions;

import java.util.Arrays;

import static java.lang.Math.pow;
import static java.lang.Math.sqrt;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand Down Expand Up @@ -90,6 +93,10 @@ public void normalize () {
}
}

/**
* Print a text-based representation of the distribution to standard out.
* There is another method to show the distribution in a graphical plot window.
*/
public void illustrate () {
final int width = 50;
double max = Arrays.stream(masses).max().getAsDouble();
Expand All @@ -102,6 +109,12 @@ public void illustrate () {
}
}

/**
* Given a percentile such as 25 or 50, find the x bin at which that percentile is situated in this Distribution,
* i.e. the lowest (binned or discretized) x value for which the cumulative probability is at least percentile.
* In common usage: find the lowest whole-minute travel time for which the cumulative probability is greater than
* the supplied percentile.
*/
public int findPercentile (int percentile) {
double sum = 0;
double threshold = percentile / 100d;
Expand All @@ -123,6 +136,10 @@ public static void main (String[] args) {
out.illustrate();
}

/**
* @return the probability mass situated at a particular x value (the probability density for a particular minute
* when these are used in the usual way as 1-minute bins).
*/
public double probabilityOf (int x) {
if (x < skip) {
return 0;
Expand Down Expand Up @@ -200,10 +217,18 @@ public static Distribution fromTravelTimeResult (TravelTimeResult travelTimeResu
}

/**
* Find the probability mass of the overlapping region of the two distributions. The amount of "misplaced"
* probability is one minus overlap. Overlap is slightly more straightforward to calculate directly than mismatch.
* Find the probability mass of the overlapping region of the two distributions. This can be used to determine
* whether two distributions, often a theoretical one and an observed one, are sufficiently similar to one another.
* Overlapping here means in both dimensions, travel time (horizontal) and probability density (vertical).
* Proceeding bin by bin through both distributions in parallel, the smaller of the two values for each bin is
* accumulated into the total. The amount of "misplaced" probability (located in the wrong bin in the observed
* distribution relative to the theoretical one) is one minus overlap. Overlap is slightly more straightforward
* to calculate directly than mismatch. This method is not sensitive to how evenly the error is distributed
* across the domain. We should prefer using a measure that emphasizes larger errors and compensates for the
* magnitude of the predicted values.
*/
public double overlap (Distribution other) {
// TODO This min is not necessary. The overlap is by definition fully within the domain of either Distribution.
int iMin = Math.min(this.skip, other.skip);
int iMax = Math.max(this.fullWidth(), other.fullWidth());
double sum = 0;
Expand All @@ -216,12 +241,45 @@ public double overlap (Distribution other) {
return sum;
}

/**
* An ad-hoc measure of goodness of fit vaguely related to Pearson's chi-squared or root-mean-square error.
* Traditional measures used in fitting probability distributions like Pearson's have properties that deal poorly
* with our need to tolerate small horizontal shifts
* in the results (due to the destination grid being not precisely aligned with our street corner grid).
* Another way to deal with this would be to ensure there is no horizontal shift, by measuring travel times at
* exactly the right places instead of on a grid.
*/
public double weightedSquaredError (Distribution other) {
double sum = 0;
// This is kind of ugly because we're only examining bins with nonzero probability (to avoid div by zero).
// Observed data in a region with predicted zero probability should be an automatic fail for the model.
for (int i = this.skip; i < this.fullWidth(); i++) {
double pe = this.probabilityOf(i);
double po = other.probabilityOf(i);
Preconditions.checkState(pe >= 0); // Ensure non-negative for good measure.
if (pe == 0) {
System.out.println("Zero (expected probability; skipping.");
continue;
}
// Errors are measured relative to the expected values, and stronger deviations emphasized by squaring.
// Measuring relative to expected density compensates for the case where it is spread over a wider domain.
sum += pow(po - pe, 2) / pe;
}
System.out.println("Squared error: " + sum);
System.out.println("Root Squared error: " + sqrt(sum));
return sum;
}

public void assertSimilar (Distribution observed) {
double squaredError = this.weightedSquaredError(observed);
showChartsIfEnabled(observed);
assertTrue(squaredError < 0.025, String.format("Error metric too high at at %3f", squaredError));
}

public void showChartsIfEnabled (Distribution observed) {
if (SHOW_CHARTS) {
DistributionChart.showChart(this, observed);
}
double overlapPercent = this.overlap(observed) * 100;
assertTrue(overlapPercent >= 95, String.format("Overlap less than 95%% at %3f", overlapPercent));
}

// This is ugly, it should be done some other way e.g. firstNonzero
Expand Down Expand Up @@ -249,4 +307,19 @@ public void trim () {
}
masses = Arrays.copyOfRange(masses, firstNonzero, lastNonzero + 1);
}

/**
* Here we are performing two related checks for a bit of redundancy and to check different parts of the system:
* checking percentiles drawn from the observed distribution, as well as the full histogram of the distribution.
* This double comparison could be done automatically with a method like Distribution.assertSimilar(TravelTimeResult).
* @param destination the flattened 1D index into the pointset, which will be zero for single freeform points.
*/
public void multiAssertSimilar(TravelTimeResult travelTimes, int destination) {
// Check a goodness-of-fit metric on the observed distribution relative to this distribution.
Distribution observed = Distribution.fromTravelTimeResult(travelTimes, destination);
this.assertSimilar(observed);
// Check that percentiles extracted from observed are similar to those predicted by this distribution.
int[] travelTimePercentiles = travelTimes.getTarget(destination);
DistributionTester.assertExpectedDistribution(this, travelTimePercentiles);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ public JFreeChart createChart (Distribution... distributions) {
return chart;
}

// Note that the points are placed at the minute boundary, though the numbers represent densities over one minute.
// They should probably be represented as filled bars across the minute or as points midway across the minute.
private static TimeSeriesCollection createTimeSeriesDataset (Distribution... distributions) {
TimeSeriesCollection dataset = new TimeSeriesCollection();
int d = 0;
for (Distribution distribution : distributions) {
TimeSeries ts = new TimeSeries("X");
TimeSeries ts = new TimeSeries("Distribution " + (d++));
for (int i = distribution.skip(); i < distribution.fullWidth(); i++) {
double p = distribution.probabilityOf(i);
ts.add(new Minute(i, 0, 1, 1, 2000), p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public static void assertUniformlyDistributed (int[] sortedPercentiles, int min,
}
}

/**
* Given an expected distribution of travel times at a destination and the standard five percentiles of travel time
* at that same destination as computed by our router, check that the computed values seem to be drawn from the
* theoretically correct distribution.
*/
public static void assertExpectedDistribution (Distribution expectedDistribution, int[] values) {
for (int p = 0; p < PERCENTILES.length; p++) {
int expected = expectedDistribution.findPercentile(PERCENTILES[p]);
Expand Down
Loading