From c039b03abf6c13e011a1a16f9ecf21b21e44dafb Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Thu, 9 Nov 2023 14:28:23 +0800 Subject: [PATCH] Remove S3 Seamless Source. This removes the final aws dependency from r5. Census extractor no longer works and needs to be moved. --- build.gradle | 5 +- .../com/conveyal/analysis/BackendConfig.java | 8 --- .../components/LocalBackendComponents.java | 3 +- .../controllers/DataSourceController.java | 2 +- .../OpportunityDatasetController.java | 6 +-- .../grids/SeamlessCensusGridExtractor.java | 20 +++----- .../conveyal/data/census/CensusExtractor.java | 10 ++-- .../data/census/FileSeamlessSource.java | 5 ++ .../data/census/S3SeamlessSource.java | 50 ------------------- .../conveyal/data/census/SeamlessSource.java | 5 ++ 10 files changed, 31 insertions(+), 83 deletions(-) delete mode 100644 src/main/java/com/conveyal/data/census/S3SeamlessSource.java diff --git a/build.gradle b/build.gradle index 7bc3a4fff..e676e9a4d 100644 --- a/build.gradle +++ b/build.gradle @@ -212,12 +212,9 @@ dependencies { implementation ('org.apache.httpcomponents:httpclient:4.5.6') { because 'Force use of version used by AWS SDK instead of other versions used by our transitive dependencies.' // TODO eventually migrate over to Java's built-in HTTP client. + // TODO check that this remains coherent with other dependencies } - // Persistent storage of files / objects on Amazon S3. - // Now used only for Seamless Census TODO eliminate this final AWS dependency - implementation 'com.amazonaws:aws-java-sdk-s3:1.11.341' - // Commons Math gives us FastMath, MersenneTwister, and low-discrepancy vector generators. implementation 'org.apache.commons:commons-math3:3.0' diff --git a/src/main/java/com/conveyal/analysis/BackendConfig.java b/src/main/java/com/conveyal/analysis/BackendConfig.java index 68a747560..856ba00e7 100644 --- a/src/main/java/com/conveyal/analysis/BackendConfig.java +++ b/src/main/java/com/conveyal/analysis/BackendConfig.java @@ -4,7 +4,6 @@ import com.conveyal.analysis.components.LocalWorkerLauncher; import com.conveyal.analysis.components.TaskScheduler; import com.conveyal.analysis.components.broker.Broker; -import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.persistence.AnalysisDB; import com.conveyal.file.LocalFileStorage; import org.slf4j.Logger; @@ -18,7 +17,6 @@ public class BackendConfig extends ConfigBase implements AnalysisDB.Config, Broker.Config, HttpApi.Config, - SeamlessCensusGridExtractor.Config, LocalWorkerLauncher.Config, LocalFileStorage.Config { @@ -36,8 +34,6 @@ public class BackendConfig extends ConfigBase implements private final String localCacheDirectory; private final int serverPort; private final String allowOrigin; - private final String seamlessCensusBucket; - private final String seamlessCensusRegion; private final int lightThreads; private final int heavyThreads; private final int maxWorkers; @@ -64,8 +60,6 @@ protected BackendConfig (Properties properties) { serverPort = intProp("server-port"); offline = boolProp("offline"); allowOrigin = strProp("access-control-allow-origin"); - seamlessCensusBucket = strProp("seamless-census-bucket"); - seamlessCensusRegion = strProp("seamless-census-region"); lightThreads = intProp("light-threads"); heavyThreads = intProp("heavy-threads"); maxWorkers = intProp("max-workers"); @@ -83,8 +77,6 @@ protected BackendConfig (Properties properties) { @Override public String localCacheDirectory() { return localCacheDirectory;} @Override public boolean testTaskRedelivery() { return testTaskRedelivery; } @Override public String allowOrigin() { return allowOrigin; } - @Override public String seamlessCensusRegion() { return seamlessCensusRegion; } - @Override public String seamlessCensusBucket() { return seamlessCensusBucket; } @Override public int serverPort() { return serverPort; } @Override public boolean offline() { return offline; } @Override public int maxWorkers() { return maxWorkers; } diff --git a/src/main/java/com/conveyal/analysis/components/LocalBackendComponents.java b/src/main/java/com/conveyal/analysis/components/LocalBackendComponents.java index 4de8e3098..941d273cb 100644 --- a/src/main/java/com/conveyal/analysis/components/LocalBackendComponents.java +++ b/src/main/java/com/conveyal/analysis/components/LocalBackendComponents.java @@ -8,6 +8,7 @@ import com.conveyal.analysis.controllers.LocalFilesController; import com.conveyal.analysis.grids.SeamlessCensusGridExtractor; import com.conveyal.analysis.persistence.AnalysisDB; +import com.conveyal.data.census.FileSeamlessSource; import com.conveyal.file.LocalFileStorage; import com.conveyal.gtfs.GTFSCache; import com.conveyal.r5.streets.OSMCache; @@ -35,7 +36,7 @@ public LocalBackendComponents () { // TODO add nested LocalWorkerComponents here, to reuse some components, and pass it into the LocalWorkerLauncher? workerLauncher = new LocalWorkerLauncher(config, fileStorage, gtfsCache, osmCache); broker = new Broker(config, fileStorage, eventBus, workerLauncher); - censusExtractor = new SeamlessCensusGridExtractor(config); + censusExtractor = new SeamlessCensusGridExtractor(new FileSeamlessSource("cache/census")); // Instantiate the HttpControllers last, when all the components except the HttpApi are already created. List httpControllers = standardHttpControllers(); httpControllers.add(new LocalFilesController(fileStorage)); diff --git a/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java b/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java index c61f6a687..0c9fe6e7a 100644 --- a/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java +++ b/src/main/java/com/conveyal/analysis/controllers/DataSourceController.java @@ -142,7 +142,7 @@ private SpatialDataSource downloadLODES(Request req, Response res) { final String regionId = req.params("regionId"); final int zoom = parseZoom(req.queryParams("zoom")); UserPermissions userPermissions = UserPermissions.from(req); - SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName); + SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName()); source.regionId = regionId; taskScheduler.enqueue(Task.create("Extracting LODES data") diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index b1afcfc05..4d6a51404 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -148,14 +148,14 @@ private OpportunityDatasetUploadStatus downloadLODES (Request req, Response res) // Common UUID for all LODES datasets created in this download (e.g. so they can be grouped together and // deleted as a batch using deleteSourceSet) TODO use DataGroup and DataSource (creating only one DataSource per region). // The bucket name contains the specific lodes data set and year so works as an appropriate name - final OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, extractor.sourceName); + final OpportunityDatasetUploadStatus status = new OpportunityDatasetUploadStatus(regionId, extractor.sourceName()); addStatusAndRemoveOldStatuses(status); // TODO we should be reusing the same source from Mongo, not making new ephemeral ones on each extract operation - SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName); + SpatialDataSource source = new SpatialDataSource(userPermissions, extractor.sourceName()); source.regionId = regionId; // Make a new group that will containin the N OpportunityDatasets we're saving. - String description = String.format("Import %s to %s", extractor.sourceName, region.name); + String description = String.format("Import %s to %s", extractor.sourceName(), region.name); DataGroup dataGroup = new DataGroup(userPermissions, source._id.toString(), description); taskScheduler.enqueue(Task.create("Extracting LODES data") diff --git a/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java b/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java index 289b9987f..8f6e95c8e 100644 --- a/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java +++ b/src/main/java/com/conveyal/analysis/grids/SeamlessCensusGridExtractor.java @@ -2,7 +2,7 @@ import com.conveyal.analysis.components.Component; import com.conveyal.analysis.models.Bounds; -import com.conveyal.data.census.S3SeamlessSource; +import com.conveyal.data.census.SeamlessSource; import com.conveyal.data.geobuf.GeobufFeature; import com.conveyal.r5.analyst.Grid; import com.conveyal.r5.analyst.progress.ProgressListener; @@ -31,19 +31,15 @@ public class SeamlessCensusGridExtractor implements Component { "Workers Data creation date" )); - public interface Config { - String seamlessCensusRegion (); - String seamlessCensusBucket (); - } - - private final S3SeamlessSource source; + private final SeamlessSource source; - /** A human-readable name for the source of extracted data, e.g. for distinguishing between different years. */ - public final String sourceName; + /** @return a human-readable name for the source of extracted data, e.g. for distinguishing between different years. */ + public final String sourceName () { + return source.name(); + } - public SeamlessCensusGridExtractor (Config config) { - source = new S3SeamlessSource(config.seamlessCensusRegion(), config.seamlessCensusBucket()); - sourceName = config.seamlessCensusBucket(); + public SeamlessCensusGridExtractor (SeamlessSource source) { + this.source = source; } /** diff --git a/src/main/java/com/conveyal/data/census/CensusExtractor.java b/src/main/java/com/conveyal/data/census/CensusExtractor.java index 86918c2d7..08f979424 100644 --- a/src/main/java/com/conveyal/data/census/CensusExtractor.java +++ b/src/main/java/com/conveyal/data/census/CensusExtractor.java @@ -16,7 +16,7 @@ import java.util.Map; /** - * Extract Census data from a seamless datastore. + * Extract Census data from the s3 seamless datastore to local files. */ public class CensusExtractor { /** @@ -34,10 +34,12 @@ public static void main (String... args) throws IOException { } SeamlessSource source; - if (!args[0].startsWith("s3://")) + if (!args[0].startsWith("s3://")) { source = new FileSeamlessSource(args[0]); - else - source = new S3SeamlessSource(args[0].substring(5)); + } else { + source = null; // new S3SeamlessSource(args[0].substring(5)); + throw new UnsupportedOperationException("S3 support is no longer in this repo."); + } long start = System.currentTimeMillis(); diff --git a/src/main/java/com/conveyal/data/census/FileSeamlessSource.java b/src/main/java/com/conveyal/data/census/FileSeamlessSource.java index c8a17cab9..3355f1110 100644 --- a/src/main/java/com/conveyal/data/census/FileSeamlessSource.java +++ b/src/main/java/com/conveyal/data/census/FileSeamlessSource.java @@ -15,6 +15,11 @@ public FileSeamlessSource(String path) { this.directory = new File(path); } + @Override + public String name() { + return directory.getName(); + } + @Override protected InputStream getInputStream(int x, int y) throws IOException { File dir = new File(directory, x + ""); File file = new File(dir, y + ".pbf.gz"); diff --git a/src/main/java/com/conveyal/data/census/S3SeamlessSource.java b/src/main/java/com/conveyal/data/census/S3SeamlessSource.java deleted file mode 100644 index 08ab8a35e..000000000 --- a/src/main/java/com/conveyal/data/census/S3SeamlessSource.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.conveyal.data.census; - -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.GetObjectRequest; - -import java.io.IOException; -import java.io.InputStream; - -/** - * A seamless data source based on storage in Amazon S3. - */ -public class S3SeamlessSource extends SeamlessSource { - private static AmazonS3 s3; - - public final String region; - public final String bucketName; - - public S3SeamlessSource(String bucketName) { - this.region = null; - this.bucketName = bucketName; - this.s3 = AmazonS3ClientBuilder.defaultClient(); - } - - public S3SeamlessSource(String region, String bucketName) { - this.region = region; - this.bucketName = bucketName; - this.s3 = AmazonS3ClientBuilder.standard() - .withRegion(region) - .build(); - } - - @Override - protected InputStream getInputStream(int x, int y) throws IOException { - try { - GetObjectRequest req = new GetObjectRequest(bucketName, String.format("%d/%d.pbf.gz", x, y)); - // the LODES bucket is requester-pays. - req.setRequesterPays(true); - return s3.getObject(req).getObjectContent(); - } catch (AmazonS3Exception e) { - // there is no data in this tile - if ("NoSuchKey".equals(e.getErrorCode())) - return null; - else - // re-throw, something else is amiss - throw e; - } - } -} diff --git a/src/main/java/com/conveyal/data/census/SeamlessSource.java b/src/main/java/com/conveyal/data/census/SeamlessSource.java index 7d912eccd..e47e78580 100644 --- a/src/main/java/com/conveyal/data/census/SeamlessSource.java +++ b/src/main/java/com/conveyal/data/census/SeamlessSource.java @@ -34,6 +34,11 @@ public abstract class SeamlessSource { private static final GeometryFactory geometryFactory = new GeometryFactory(); + /** + * @return A human-readable name for the source of extracted data, e.g. for distinguishing between different years. + */ + public abstract String name(); + /** Extract features by bounding box */ public Map extract( double north, double east, double south, double west, boolean onDisk, ProgressListener progressListener