Skip to content

Commit

Permalink
Rework data bundle format
Browse files Browse the repository at this point in the history
Better handling of routes
  • Loading branch information
dellisd committed Sep 19, 2023
1 parent f5db7da commit 78a34ad
Show file tree
Hide file tree
Showing 32 changed files with 292 additions and 287 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import kotlin.io.path.outputStream
class PreprocessCommand : CliktCommand() {
private val logger = LoggerFactory.getLogger(javaClass)

private val source: Path by argument().path(canBeDir = false)
private val source: Path by argument().path(canBeDir = false, mustExist = true)
private val output: Path by option("--output", "-o").path(canBeFile = false).default(Path("data"))
private val name: String by option("--name", "-n").default("gtfs")

Expand All @@ -42,9 +42,9 @@ class PreprocessCommand : CliktCommand() {
val cache = GtfsDb.fromReader(GtfsReader(source), into = cachePath)

logger.info("Bundling data into {}/{}.json", output, name)
val bundler = DataBundler()
val bundler = DataBundler(cache)

val bundle = bundler.assembleDataBundle(cache)
val bundle = bundler.assembleDataBundle()
val bundleFile = output / "$name.json"
bundleFile.outputStream().use { stream ->
Json.encodeToStream(bundle, stream)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@ package ca.derekellis.reroute.server.data

import ca.derekellis.kgtfs.ExperimentalKgtfsApi
import ca.derekellis.kgtfs.GtfsDb
import ca.derekellis.kgtfs.GtfsDbScope
import ca.derekellis.kgtfs.csv.ServiceId
import ca.derekellis.kgtfs.csv.StopId
import ca.derekellis.kgtfs.csv.Trip
import ca.derekellis.kgtfs.csv.TripId
import ca.derekellis.kgtfs.ext.TripSequence
import ca.derekellis.kgtfs.ext.lineString
import ca.derekellis.kgtfs.ext.uniqueTripSequences
import ca.derekellis.reroute.models.Route
import ca.derekellis.reroute.models.RouteAtStop
import ca.derekellis.reroute.models.RouteVariant
import ca.derekellis.reroute.models.RouteVariantsAtStop
import ca.derekellis.reroute.models.Stop
import ca.derekellis.reroute.models.StopInTimetable
import ca.derekellis.reroute.models.TransitDataBundle
import io.github.dellisd.spatialk.geojson.Position
import org.jetbrains.exposed.sql.select
Expand All @@ -20,9 +23,9 @@ import org.jgrapht.graph.DefaultDirectedGraph
import org.jgrapht.graph.DefaultEdge
import org.jgrapht.traverse.TopologicalOrderIterator

class DataBundler {
@OptIn(ExperimentalKgtfsApi::class)
fun assembleDataBundle(gtfs: GtfsDb): TransitDataBundle = gtfs.query {
@OptIn(ExperimentalKgtfsApi::class)
class DataBundler(private val gtfs: GtfsDb) {
fun assembleDataBundle(): TransitDataBundle = gtfs.query {
val stops = Stops.selectAll().map(Stops.Mapper).associateBy { it.id }
val trips = Trips.selectAll().map(Trips.Mapper).associateBy { it.id }
val calendars = Calendars.selectAll().map { ServiceId(it[Calendars.serviceId]) }.toSet()
Expand All @@ -35,46 +38,78 @@ class DataBundler {
"${it.gtfsId.value}-${trip.directionId}"
}.toList()

val processedRoutes = grouped.flatMap { (key, value) ->
val route = Routes.select { Routes.id eq value.first().gtfsId.value }.map(Routes.mapper).single()
value.mapIndexed { i, sequence ->
val trip = trips.getValue(sequence.trips.keys.first())
val id = "$key#$i"
// TODO: Develop a better way to extract headsign values
val shape = Shapes.select { Shapes.id eq trip.shapeId!!.value }.map(Shapes.Mapper)
Route(
id,
route.id.value,
route.shortName!!,
trip.headsign!!,
trip.directionId!!,
sequence.trips.size,
shape!!.lineString(),
) to sequence.sequence.mapIndexed { index, stopId -> RouteAtStop(stopId.value, id, index) }
}
}

// Build graph of each route+direction and topological ordering
val timetable = grouped.flatMap { (_, tripSequences) ->
val routeId = tripSequences.first().gtfsId
buildSequenceGraph(tripSequences).mapIndexed { i, id -> StopInTimetable(id, routeId.value, i) }
}
val processedRoutes = processRoutes(grouped, trips)

val processedStops = stops.values
.asSequence()
.filter { it.latitude != null && it.name != null }
.map { stop -> stop.copy(name = stop.name?.titleCase()) }
.map { stop ->
val position = stop.longitude?.let { lng -> stop.latitude?.let { lat -> Position(lng, lat) } }
Stop(stop.id.value, stop.code, stop.name!!, position!!)
Stop(stop.id.value, stop.code ?: stop.id.value, stop.name!!, position!!)
}
.toList()

val uniqueRoutes = processedRoutes.map { (route) -> route }.distinctBy { it.gtfsId }
val variants = processedRoutes.flatMap { (_, variants) -> variants }

val groupedVariants = variants.groupBy { it.gtfsId }
val routes = uniqueRoutes.map { route ->
val destinations = groupedVariants.getValue(route.gtfsId)
.groupBy { it.directionId }
.toSortedMap().values
.map { it.maxBy(RouteVariant::weight).headsign }

route.copy(destinations = destinations)
}

TransitDataBundle(
processedStops,
processedRoutes.map { (route) -> route },
processedRoutes.flatMap { (_, stops) -> stops },
timetable,
stops = processedStops,
routes = routes,
routeVariants = variants,
routesAtStops = processedRoutes.flatMap { (_, _, stops) -> stops },
)
}

context(GtfsDbScope)
private fun processRoutes(
grouped: List<Pair<String, List<TripSequence>>>,
trips: Map<TripId, Trip>,
) = grouped.map { (key, value) ->
val route = Routes.select { Routes.id eq value.first().gtfsId.value }.map(Routes.mapper).single()
val variants = mutableListOf<RouteVariant>()
val sequences = mutableListOf<RouteVariantsAtStop>()

value.forEachIndexed { i, sequence ->
val trip = trips.getValue(sequence.trips.keys.first())
val id = "$key#$i"
// TODO: Develop a better way to extract headsign values
val shape = Shapes.select { Shapes.id eq trip.shapeId!!.value }.map(Shapes.Mapper)

sequences += sequence.sequence.mapIndexed { index, stopId ->
RouteVariantsAtStop(stopId.value, id, index)
}
variants += RouteVariant(
id,
route.id.value,
trip.directionId!!,
trip.headsign!!,
sequence.trips.size,
shape.lineString(),
)
}

val destinations = variants
.groupBy { it.directionId }
.toSortedMap()
.map { (_, subset) ->
subset.maxBy { it.weight }.headsign
}

Triple(
Route(route.id.value, route.shortName!!, destinations),
variants,
sequences,
)
}

Expand All @@ -99,9 +134,9 @@ class DataBundler {
}

val cycleDetector = CycleDetector(graph)
if (cycleDetector.detectCycles()) {
check(!cycleDetector.detectCycles()) {
val sequence = sequences.first()
throw IllegalStateException("Cycle detected in route ${sequence.uniqueId}#${sequence.hash}")
"Cycle detected in route ${sequence.uniqueId}#${sequence.hash}"
}

val iterator = TopologicalOrderIterator(graph)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package ca.derekellis.reroute.server.data
import ca.derekellis.reroute.server.RoutingModule
import ca.derekellis.reroute.server.config.LoadedServerConfig
import ca.derekellis.reroute.server.di.RerouteScope
import io.ktor.http.HttpStatusCode
import io.ktor.server.application.ApplicationCall
import io.ktor.server.application.call
import io.ktor.server.response.lastModified
import io.ktor.server.response.respond
import io.ktor.server.response.respondFile
import io.ktor.server.routing.Routing
import io.ktor.server.routing.get
Expand All @@ -14,30 +16,45 @@ import io.ktor.util.pipeline.PipelineContext
import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.withContext
import me.tatarka.inject.annotations.Inject
import org.slf4j.LoggerFactory
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.attribute.BasicFileAttributes
import java.time.ZoneId
import java.time.ZonedDateTime
import kotlin.io.path.div
import kotlin.io.path.notExists
import kotlin.io.path.pathString

@Inject
@RerouteScope
class DataRoute(private val config: LoadedServerConfig) : RoutingModule {
private val logger = LoggerFactory.getLogger(javaClass)

// TODO: Parameterize these data filenames
private val filePath = config.dataPath / "gtfs.json"
private val geojsonPath = config.dataPath / "gtfs.geojson"

context(Routing)
override fun route() = route("data") {
get("/") {
lastModifiedOf(filePath)
call.respondFile(filePath.toFile())
if (filePath.notExists()) {
logger.warn("Data file not found! Searched at {}", filePath.pathString)
call.respond(HttpStatusCode.NotFound)
} else {
lastModifiedOf(filePath)
call.respondFile(filePath.toFile())
}
}

get("/geojson") {
lastModifiedOf(geojsonPath)
call.respondFile(geojsonPath.toFile())
if (filePath.notExists()) {
logger.warn("GeoJSON file not found! Searched at {}", filePath.pathString)
call.respond(HttpStatusCode.NotFound)
} else {
lastModifiedOf(geojsonPath)
call.respondFile(geojsonPath.toFile())
}
}
}

Expand Down
20 changes: 6 additions & 14 deletions shared/src/commonMain/kotlin/ca/derekellis/reroute/models/Route.kt
Original file line number Diff line number Diff line change
@@ -1,26 +1,18 @@
package ca.derekellis.reroute.models

import io.github.dellisd.spatialk.geojson.LineString
import kotlinx.serialization.Serializable

/**
* A (simplified) representation of a transit route and its variants/branches.
* A unique combination of [gtfsId] + [headsign] + [directionId] is considered a unique route variant.
* A transit route in a given direction. A route can have multiple [variants][RouteVariant].
*
* @param id A unique identifier for this route variation
* @param gtfsId The id used to reference this route in GTFS data
* @param name The display name of the route
* @param headsign The destination of this variant of the route
* @param directionId The GTFS direction ID associated with this route+headsign combo
* @param weight The number of scheduled trips for this particular variant
* @param identifier The identifier of the route shown to users, typically a route number
* @param destinations The destinations of the different directions for this route. The index in the list corresponds
* to the directionId property in GTFS trips.
*/
@Serializable
data class Route(
val id: String,
val gtfsId: String,
val name: String,
val headsign: String,
val directionId: Int,
val weight: Int,
val shape: LineString,
val identifier: String,
val destinations: List<String>,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package ca.derekellis.reroute.models

import io.github.dellisd.spatialk.geojson.LineString
import kotlinx.serialization.Serializable

/**
* A variant/branch of a [Route].
*
* @param id A unique id for the route variation
* @param gtfsId The GTFS id of the route
* @param directionId The GTFS direction id for trips on this route variant
* @param headsign The destination of this particular variant of the route
* @param weight The number of trips on this variant, used to weight different variants against each other.
* @param shape The GeoJSON shape of this variant.
*/
@Serializable
data class RouteVariant(
val id: String,
val gtfsId: String,
val directionId: Int,
val headsign: String,
val weight: Int,
val shape: LineString,
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ import kotlinx.serialization.Serializable
* A mapping between a stop and a route variant that serves that stop.
*
* @param stopId Refers to the ID of a [Stop]
* @param routeId Refers to the ID of a [Route], not to be confused with the route's [gtfsId][Route.gtfsId]
* @param routeVariantId Refers to the ID of a [RouteVariant].
* @param index The ordering in which this stop appears in this particular route variation
*/
@Serializable
data class RouteAtStop(
data class RouteVariantsAtStop(
val stopId: String,
val routeId: String,
val routeVariantId: String,
val index: Int,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,22 @@ package ca.derekellis.reroute.models
import io.github.dellisd.spatialk.geojson.Position
import kotlinx.serialization.Serializable

/**
* A stop or station in a transit system.
*
* @param id A unique identifier for this stop, typically corresponding to the id in the GTFS stops table.
* @param code The code visible to users to reference this stop. If the original GTFS stops table does not contain a
* code for the stop, the value of this parameter will be equal to [id].
* @param name The name of the stop or station.
* @param position The Lng Lat position of the stop.
* @param parent A station could have multiple platforms where each [Stop] represents one of the station's platforms.
* In this case, each platform's parent would be a [Stop] entry for the station as a whole.
*/
@Serializable
data class Stop(
val id: String,
val code: String?,
val code: String,
val name: String,
val position: Position,
val parent: String? = null,
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ package ca.derekellis.reroute.models
import kotlinx.serialization.Serializable

/**
* The ordering of a stop in a specified route's timetable
* The ordering of a stop in a specified route variant's timetable
*
* @param stopId Refers to the ID of a [Stop]
* @param routeId Refers to the GTFS ID of a [Route]
* @param routeVariantId Refers to the GTFS ID of a [Route]
* @param index The ordering in which this stop appears in the route's timetable
*/
@Serializable
data class StopInTimetable(
val stopId: String,
val routeId: String,
val routeVariantId: String,
val index: Int,
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ import kotlinx.serialization.Serializable
data class TransitDataBundle(
val stops: List<Stop>,
val routes: List<Route>,
val routesAtStops: List<RouteAtStop>,
val timetable: List<StopInTimetable>,
val routeVariants: List<RouteVariant>,
val routesAtStops: List<RouteVariantsAtStop>,
)
1 change: 0 additions & 1 deletion web/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ sqldelight {
create("RerouteDatabase") {
packageName.set("ca.derekellis.reroute.db")
generateAsync.set(true)
deriveSchemaFromMigrations.set(true)
}
}
}
Expand Down
Loading

0 comments on commit 78a34ad

Please sign in to comment.