Skip to content

Commit

Permalink
Merge branch 'dev' into abyrd/nearest-n
Browse files Browse the repository at this point in the history
  • Loading branch information
abyrd authored Sep 25, 2023
2 parents 7513337 + c6d92a9 commit 9b8cf0e
Show file tree
Hide file tree
Showing 18 changed files with 660 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package com.conveyal.gtfs.error;

import com.conveyal.gtfs.validator.model.Priority;

import java.io.Serializable;

/**
* Indicates that a stop is in a suspect location, for example in a place like a desert where there are not enough
* people to support public transit. This can be the result of incorrect coordinate transforms into WGS84.
* Stops are often located on "null island" at (0,0). This can also happen in other coordinate systems before they
* are transformed to WGS84: the origin of a common French coordinate system is in the Sahara.
*/
public class SuspectStopLocationError extends GTFSError implements Serializable {
public static final long serialVersionUID = 1L;

public SuspectStopLocationError(String stopId, long line) {
super("stops", line, "stop_id", stopId);
}

@Override public String getMessage() {
return String.format(
"Stop with ID %s is in a sparsely populated area (fewer than 5 inhabitants per square km in any " +
"neighboring 1/4 degree cell)",
affectedEntityId
);
}

@Override public Priority getPriority() {
return Priority.MEDIUM;
}
}
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Agency.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
Agency a = new Agency();
a.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
a.sourceFileLine = row;
a.agency_id = getStringField("agency_id", false); // can only be absent if there is a single agency -- requires a special validator.
a.agency_name = getStringField("agency_name", true);
a.agency_url = getUrlField("agency_url", true);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Calendar.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public void loadOneRow() throws IOException {
feed.errors.add(new DuplicateKeyError(tableName, row, "service_id", service_id));
} else {
Calendar c = new Calendar();
c.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
c.sourceFileLine = row;
c.service_id = service.service_id;
c.monday = getIntField("monday", true, 0, 1);
c.tuesday = getIntField("tuesday", true, 0, 1);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/CalendarDate.java
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public void loadOneRow() throws IOException {
feed.errors.add(new DuplicateKeyError(tableName, row, "(service_id, date)", keyString));
} else {
CalendarDate cd = new CalendarDate();
cd.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
cd.sourceFileLine = row;
cd.service_id = service_id;
cd.date = date;
cd.exception_type = getIntField("exception_type", true, 1, 2);
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/com/conveyal/gtfs/model/Entity.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ public static abstract class Loader<E extends Entity> {
protected final Set<String> missingRequiredColumns = Sets.newHashSet();

protected CsvReader reader;
protected int row;

// Use one-based rows which are easier to understand in error messages.
// Line numbers are conventionally one-based even in IDEs and programming editors.
protected int row = 1;

// TODO "String column" that is set before any calls to avoid passing around the column name

public Loader(GTFSFeed feed, String tableName) {
Expand Down Expand Up @@ -289,18 +293,22 @@ public void loadTable (ZipFile zip) throws IOException {
}
CsvReader reader = new CsvReader(inStream, ',', Charset.forName("UTF8"));
this.reader = reader;
// row number is already positioned at 1
boolean hasHeaders = reader.readHeaders();
if (!hasHeaders) {
feed.errors.add(new EmptyTableError(tableName));
}
while (reader.readRecord()) {
// reader.getCurrentRecord() is zero-based and does not include the header line, keep our own row count
if (++row % 500000 == 0) {
// reader.getCurrentRecord() is zero-based and does not include the header line,
// so we keep our own one-based row count that does include the header line.
// Advance row number before calling loadOneRow so the current value is available inside that function.
row += 1;
if (row % 500000 == 0) {
LOG.info("Record number {}", human(row));
}
loadOneRow(); // Call subclass method to produce an entity from the current row.
}
if (row == 0) {
if (row < 2) {
feed.errors.add(new EmptyTableError(tableName));
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/FareAttribute.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public void loadOneRow() throws IOException {
feed.errors.add(new DuplicateKeyError(tableName, row, "fare_id", fareId));
} else {
FareAttribute fa = new FareAttribute();
fa.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
fa.sourceFileLine = row;
fa.fare_id = fareId;
fa.price = getDoubleField("price", true, 0, Integer.MAX_VALUE);
fa.currency_type = getStringField("currency_type", true);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/FareRule.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public void loadOneRow() throws IOException {

Fare fare = fares.computeIfAbsent(fareId, Fare::new);
FareRule fr = new FareRule();
fr.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
fr.sourceFileLine = row;
fr.fare_id = fare.fare_id;
fr.route_id = getStringField("route_id", false);
fr.origin_id = getStringField("origin_id", false);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/FeedInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
FeedInfo fi = new FeedInfo();
fi.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
fi.sourceFileLine = row;
fi.feed_id = getStringField("feed_id", false);
fi.feed_publisher_name = getStringField("feed_publisher_name", true);
fi.feed_publisher_url = getUrlField("feed_publisher_url", true);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Frequency.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ protected boolean isRequired() {
public void loadOneRow() throws IOException {
Frequency f = new Frequency();
Trip trip = getRefField("trip_id", true, feed.trips);
f.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
f.sourceFileLine = row;
f.trip_id = trip.trip_id;
f.start_time = getTimeField("start_time", true);
f.end_time = getTimeField("end_time", true);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Route.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
Route r = new Route();
r.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
r.sourceFileLine = row;
r.route_id = getStringField("route_id", true);
Agency agency = getRefField("agency_id", false, feed.agency);

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/ShapePoint.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void loadOneRow() throws IOException {
double shape_dist_traveled = getDoubleField("shape_dist_traveled", false, 0D, Double.MAX_VALUE);

ShapePoint s = new ShapePoint(shape_id, shape_pt_lat, shape_pt_lon, shape_pt_sequence, shape_dist_traveled);
s.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
s.sourceFileLine = row;
feed.shape_points.put(new Tuple2<String, Integer>(s.shape_id, s.shape_pt_sequence), s);
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Stop.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
Stop s = new Stop();
s.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
s.sourceFileLine = row;
s.stop_id = getStringField("stop_id", true);
s.stop_code = getStringField("stop_code", false);
s.stop_name = getStringField("stop_name", true);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/StopTime.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
StopTime st = new StopTime();
st.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
st.sourceFileLine = row;
st.trip_id = getStringField("trip_id", true);
// TODO: arrival_time and departure time are not required, but if one is present the other should be
// also, if this is the first or last stop, they are both required
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/conveyal/gtfs/model/Transfer.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ protected boolean isRequired() {
@Override
public void loadOneRow() throws IOException {
Transfer tr = new Transfer();
tr.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
tr.sourceFileLine = row;
tr.from_stop_id = getStringField("from_stop_id", true);
tr.to_stop_id = getStringField("to_stop_id", true);
tr.transfer_type = getIntField("transfer_type", true, 0, 3);
Expand All @@ -48,6 +48,7 @@ public void loadOneRow() throws IOException {
getRefField("from_trip_id", false, feed.trips);
getRefField("to_trip_id", false, feed.trips);

// row number used as an arbitrary unique string to give MapDB a key.
feed.transfers.put(Long.toString(row), tr);
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/gtfs/model/Trip.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ protected boolean isRequired() {
public void loadOneRow() throws IOException {
Trip t = new Trip();

t.sourceFileLine = row + 1; // offset line number by 1 to account for 0-based row index
t.sourceFileLine = row;
t.route_id = getStringField("route_id", true);
t.service_id = getStringField("service_id", true);
t.trip_id = getStringField("trip_id", true);
Expand Down
13 changes: 8 additions & 5 deletions src/main/java/com/conveyal/gtfs/storage/BooleanAsciiGrid.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

/**
* Loads an ESRI ASCII grid containing integers and allows looking up values as booleans (where > 0).
* This is used for
* This can be used for a basic stop location test using a simple population raster.
*/
public class BooleanAsciiGrid {

Expand Down Expand Up @@ -51,19 +51,22 @@ public BooleanAsciiGrid (InputStream inputStream, boolean gzipped) {
}

/**
* Get a grid for places where population density is over 5 people per square kilometer.
* Get a grid for places where population density is over 5 people per square kilometer in any neighboring cell.
* We use the Gridded Population of the World v3 data set for 2015 UN-adjusted population density.
* This data set was downloaded at 1/4 degree resolution in ESRI ASCII grid format. The grid file was edited
* manually to eliminate the no-data value header, since I could not find a way to operate on no-value cells in the
* QGIS raster calculator. Then the raster calculator in QGIS was used with the formula ("glds00ag15@1" > 5),
* which makes all cells with population density above the threshold have a value of one,
* and all others a value of zero (since the no data value in the grid is -9999). This was then exported as another
* ASCII grid file, which zips well. The license for this data set is Creative Commons Attribution.
* and all others a value of zero (since the no data value in the grid is -9999). Next, the GRASS r.neighbors tool
* in QGIS was used to blur this layer, adding to each cell the sum of the neighboring eight cells ("neighborhood
* size" of 3, the dimension in each direction). A final logical operation ("blurred@1" > 0) was applied, with the
* result exported as another ASCII grid file, which zips well. The license for this data set is Creative Commons
* Attribution.
* See http://sedac.ciesin.columbia.edu/data/collection/gpw-v3
*/
public static BooleanAsciiGrid forEarthPopulation() {
try {
InputStream gridStream = BooleanAsciiGrid.class.getResourceAsStream("gpwv3-quarter-boolean.asc");
InputStream gridStream = BooleanAsciiGrid.class.getResourceAsStream("gpwv3-quarter-buffer-boolean.asc");
return new BooleanAsciiGrid(gridStream, false);
} catch (Exception ex) {
throw new RuntimeException(ex);
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/com/conveyal/gtfs/validator/PostLoadValidator.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import com.conveyal.gtfs.error.GeneralError;
import com.conveyal.gtfs.error.RangeError;
import com.conveyal.gtfs.error.ReferentialIntegrityError;
import com.conveyal.gtfs.error.SuspectStopLocationError;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.storage.BooleanAsciiGrid;

import java.util.List;

Expand All @@ -33,6 +35,7 @@ public PostLoadValidator (GTFSFeed feed) {
public void validate () {
validateCalendarServices();
validateParentStations();
validateStopPopulationDensity();
}

/**
Expand All @@ -46,6 +49,20 @@ private void validateCalendarServices () {
}
}

/**
* Validate that stops are not in locations with no people. This can happen from incorrect coordinate transforms
* into WGS84. Stops are often located on "null island" at (0,0). This can also happen in other coordinate systems
* before they are transformed to WGS84: the origin of a common French coordinate system is in the Sahara.
*/
private void validateStopPopulationDensity () {
BooleanAsciiGrid popGrid = BooleanAsciiGrid.forEarthPopulation();
for (Stop stop : feed.stops.values()) {
if (!(popGrid.getValueForCoords(stop.stop_lon, stop.stop_lat))) {
feed.errors.add(new SuspectStopLocationError(stop.stop_id, stop.sourceFileLine));
}
}
}

/**
* Validate location_type and parent_station constraints as well as referential integrity.
* Individual validation actions like this could be factored out into separate classes (PostLoadValidators)
Expand Down
Loading

0 comments on commit 9b8cf0e

Please sign in to comment.