Skip to content

Commit

Permalink
Merge pull request #405 from DOI-USGS/nhdplushr
Browse files Browse the repository at this point in the history
Nhdplushr
  • Loading branch information
dblodgett-usgs authored Jul 29, 2024
2 parents c7c83ef + a609fa7 commit ccdd3ef
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 49 deletions.
2 changes: 2 additions & 0 deletions R/A_nhdplusTools.R
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,12 @@ assign("default_nhdplus_path", default_nhdplus_path, envir = nhdplusTools_env)

nhd_bucket <- "https://prd-tnm.s3.amazonaws.com/"
nhdhr_file_list <- "?prefix=StagedProducts/Hydrography/NHDPlusHR/VPU/Current/GDB/"
archive_nhdhr_file_list <- "?prefix=StagedProducts/Hydrography/NHDPlusHR/VPU/Archive/GDB/"
nhd_file_list <- "?prefix=StagedProducts/Hydrography/NHD/HU4/GDB/"

assign("nhd_bucket", nhd_bucket, envir = nhdplusTools_env)
assign("nhdhr_file_list", nhdhr_file_list, envir = nhdplusTools_env)
assign("archive_nhdhr_file_list", archive_nhdhr_file_list, envir = nhdplusTools_env)

assign("nldi_tier", "prod",
envir = nhdplusTools_env)
Expand Down
15 changes: 13 additions & 2 deletions R/downloading_tools.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
#' and four digit codes.
#' @param download_files boolean if FALSE, only URLs to files will be returned
#' can be hu02s and/or hu04s
#' @param archive pull data from the "archive" folder rather than "current".
#' The archive contains the original releases of NHDPlusHR data that were updated
#' in subsequent processing. Not all subsets of NHDPlusHR were updated. See:
#' https://www.usgs.gov/national-hydrography/access-national-hydrography-products
#' for more details.
#'
#' @return character Paths to geodatabases created.
#' @export
Expand All @@ -16,11 +21,17 @@
#' (hu <- substr(hu$huc8, 1, 2))
#'
#' download_nhdplushr(tempdir(), c(hu, "0203"), download_files = FALSE)
#'
#' download_nhdplushr(tempdir(), c(hu, "0203"), download_files = FALSE, archive = TRUE)
#' }
download_nhdplushr <- function(nhd_dir, hu_list, download_files = TRUE) {
download_nhdplushr <- function(nhd_dir, hu_list, download_files = TRUE, archive = FALSE) {

list_source <- get("nhdhr_file_list", envir = nhdplusTools_env)

if(archive) list_source <- get("archive_nhdhr_file_list", envir = nhdplusTools_env)

download_nhd_internal(get("nhd_bucket", envir = nhdplusTools_env),
get("nhdhr_file_list", envir = nhdplusTools_env),
list_source,
"NHDPLUS_H_", nhd_dir, hu_list, download_files)
}

Expand Down
9 changes: 9 additions & 0 deletions R/get_nhdplushr.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ get_nhdplushr <- function(hr_dir, out_gpkg = NULL,
gdbs <- list.files(hr_dir, pattern = "sub.gpkg", full.names = TRUE)
}

dup_list_key <- regmatches(basename(gdbs), regexpr("[0-9][0-9][0-9][0-9]", basename(gdbs)))

if(any(duplicated(dup_list_key))) {
remove <- gdbs[duplicated(dup_list_key)]
warning("Found duplicate HU04s in nhdplushr directory? Will not use: \n",
paste(remove, collapse = "\n"))
gdbs <- gdbs[!duplicated(dup_list_key)]
}

if(is.null(layers)) {
layers <- st_layers(gdbs[1])

Expand Down
10 changes: 9 additions & 1 deletion man/download_nhdplushr.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/testthat/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,14 @@ check_layers <- function(out_file) {
expect_true(sf::st_crs(sf::read_sf(out_file, "NHDFlowline_Network")) ==
sf::st_crs(4269))
}

setup_workdir <- function() {
work_dir <- file.path(tempdir(), "test_hr")
dir.create(work_dir, recursive = TRUE, showWarnings = FALSE)
out_gpkg <- file.path(work_dir, "temp.gpkg")
list(wd = work_dir, og = out_gpkg)
}

teardown_workdir <- function(work_dir) {
unlink(work_dir, recursive = TRUE, force = TRUE)
}
144 changes: 98 additions & 46 deletions tests/testthat/test_get_nhdplushr.R
Original file line number Diff line number Diff line change
@@ -1,157 +1,209 @@


work_dir <- file.path(tempdir(), "test_hr")
dir.create(work_dir, recursive = TRUE, showWarnings = FALSE)
out_gpkg <- file.path(work_dir, "temp.gpkg")

test_that("we get urls for nhdplushr and base", {
skip_on_cran()
urls <- download_nhdplushr(work_dir, c("01", "0203"), download_files = FALSE)

sw <- setup_workdir()

urls <- download_nhdplushr(sw$wd, c("01", "0203"), download_files = FALSE)

expect_equal(length(urls), 11)

urls <- download_nhd(work_dir, c("01", "0203"), download_files = FALSE)
urls <- download_nhdplushr(sw$wd, c("01", "0203"), download_files = FALSE, archive = TRUE)

expect_true(all(grepl("Archive", urls)))

urls <- download_nhd(sw$wd, c("01", "0203"), download_files = FALSE)

expect_equal(length(urls), 11)

teardown_workdir(sw$wd)
})

test_that("get_nhdplushr layers and gpkg", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

get_test_file(sw$wd)

out <- get_nhdplushr(work_dir, out_gpkg = out_gpkg)
out <- get_nhdplushr(sw$wd, out_gpkg = sw$og)

layers <- sf::st_layers(out_gpkg)
layers <- sf::st_layers(sw$og)
expect_equal(layers$name, c("NHDFlowline", "NHDPlusCatchment"))
expect_equal(layers$features, c(2691, 2603))
expect_equal(layers$features, c(nrow(out[[1]]), nrow(out[[2]])))
expect_equal(layers$name, names(out))

out <- get_nhdplushr(work_dir, out_gpkg = out_gpkg,
out <- get_nhdplushr(sw$wd, out_gpkg = sw$og,
layers = NULL, overwrite = TRUE)

layers <- sf::st_layers(out_gpkg)
layers <- sf::st_layers(sw$og)

expect_equal(length(layers$name), 7)
expect_equal(layers$fields[which(layers$name == "NHDFlowline")], 57)

out <- get_nhdplushr(work_dir, layers = NULL)
out <- get_nhdplushr(sw$wd, layers = NULL)

expect(length(names(out)), 7)

teardown_workdir(sw$wd)
})

test_that("get_nhdplushr duplicate vpus", {
skip_on_cran()

sw <- setup_workdir()

get_test_file(sw$wd)

f <- file.path(sw$wd, "03_sub.gpkg")
ftemp <- file.path(sw$wd, "03.gpkg")
f1 <- file.path(sw$wd, "0303_sub.gpkg")
f2 <- file.path(sw$wd, "0303_2sub.gpkg")

file.copy(f, f1)
file.copy(f, f2)

file.rename(f, ftemp)

expect_warning(out <- get_nhdplushr(sw$wd, out_gpkg = sw$og),
"Found duplicate HU04s")

file.rename(ftemp, f)
unlink(f1)
unlink(f2)

teardown_workdir(sw$wd)
})

test_that("nhdplus hr waterbody", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

out <- get_nhdplushr(work_dir, out_gpkg = out_gpkg)
get_test_file(sw$wd)

out <- get_nhdplushr(work_dir, layers = c("NHDFlowline",
out <- get_nhdplushr(sw$wd, out_gpkg = sw$og)

out <- get_nhdplushr(sw$wd, layers = c("NHDFlowline",
"NHDWaterbody"),
out_gpkg = out_gpkg)
out_gpkg = sw$og)

wb <- out$NHDWaterbody[out$NHDWaterbody$Permanent_Identifier == 46376571,]

expect_equal(get_wb_outlet(wb$Permanent_Identifier, out$NHDFlowline)$Permanent_Identifier,
"46338320")

teardown_workdir(sw$wd)
})

test_that("get_nhdplushr overwrite gpkg and pattern", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

out <- get_nhdplushr(work_dir, out_gpkg = out_gpkg)
get_test_file(sw$wd)

out <- get_nhdplushr(sw$wd, out_gpkg = sw$og)

layer <- c("NHDFlowline")
out_sub <- get_nhdplushr(work_dir, out_gpkg = out_gpkg,
out_sub <- get_nhdplushr(sw$wd, out_gpkg = sw$og,
layers = layer, overwrite = FALSE)

expect_equal(names(out_sub), layer)
layers <- sf::st_layers(out_gpkg)
expect_equal(length(layers$name), 7)
layers <- sf::st_layers(sw$og)
expect_equal(length(layers$name), 2)

fl <- sf::read_sf(out_gpkg, layer)
fl <- sf::read_sf(sw$og, layer)

out_sub <- get_nhdplushr(work_dir, out_gpkg = out_gpkg,
out_sub <- get_nhdplushr(sw$wd, out_gpkg = sw$og,
layers = layer, min_size_sqkm = 10,
overwrite = TRUE)

layers <- sf::st_layers(out_gpkg)
expect_equal(length(layers$name), 7)
layers <- sf::st_layers(sw$og)
expect_equal(length(layers$name), 2)

fl2 <- sf::read_sf(out_gpkg, layer)
fl2 <- sf::read_sf(sw$og, layer)

expect_true(nrow(fl2) < nrow(fl))

devnull <- file.copy(file.path(work_dir, "03_sub.gpkg"),
file.path(work_dir, "04_sub.gpkg"))
devnull <- file.copy(file.path(sw$wd, "03_sub.gpkg"),
file.path(sw$wd, "04_sub.gpkg"))

fl <- read_sf(file.path(work_dir, "04_sub.gpkg"), "NHDFlowline")
fl <- read_sf(file.path(sw$wd, "04_sub.gpkg"), "NHDFlowline")
fl$NHDPlusID <- fl$NHDPlusID + max(fl$NHDPlusID)
write_sf(fl, file.path(work_dir, "04_sub.gpkg"), "NHDFlowline")
write_sf(fl, file.path(sw$wd, "04_sub.gpkg"), "NHDFlowline")

out_sub <- get_nhdplushr(work_dir, pattern = ".*sub.gpkg$")
out_sub <- get_nhdplushr(sw$wd, pattern = ".*sub.gpkg$")

expect_equal(nrow(out_sub$NHDFlowline), 2*nrow(fl))

unlink(file.path(work_dir, "04_sub.gpkg"))
unlink(file.path(sw$wd, "04_sub.gpkg"))

teardown_workdir(sw$wd)
})

test_that("get_nhdplushr simp and proj", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

out <- get_nhdplushr(work_dir)
get_test_file(sw$wd)

out_sub <- get_nhdplushr(work_dir, proj = 5070)
out <- get_nhdplushr(sw$wd)

out_sub <- get_nhdplushr(sw$wd, proj = 5070)

expect_equal(st_crs(out_sub$NHDFlowline),
st_crs(5070))

expect_equal(st_crs(out_sub$NHDPlusCatchment),
st_crs(5070))

out_sub2 <- get_nhdplushr(work_dir, proj = 5070, simp = 20)
out_sub2 <- get_nhdplushr(sw$wd, proj = 5070, simp = 20)
expect_true(length(st_geometry(out_sub$NHDFlowline)[[1]]) >
length(st_geometry(out_sub2$NHDFlowline)[[1]]))

expect_true(nrow(st_geometry(out_sub$NHDPlusCatchment)[[1]][[1]][[1]]) >
nrow(st_geometry(out_sub2$NHDPlusCatchment)[[1]][[1]]))

teardown_workdir(sw$wd)
})

test_that("get_nhdplushr rename and keep_cols", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

get_test_file(sw$wd)

out <- get_nhdplushr(work_dir, out_gpkg = out_gpkg)
out <- get_nhdplushr(sw$wd, out_gpkg = sw$og)

out_sub <- get_nhdplushr(work_dir,
out_sub <- get_nhdplushr(sw$wd,
keep_cols = c("COMID", "FEATUREID", "StreamOrde", "AreaSqKM"),
check_terminals = FALSE)

expect_equal(names(out_sub$NHDFlowline), c("COMID", "StreamOrde", "AreaSqKM", "geom"))
expect_equal(names(out_sub$NHDPlusCatchment), c("FEATUREID", "AreaSqKM", "geom"))

out_sub <- get_nhdplushr(work_dir, rename = FALSE, check_terminals = FALSE)
out_sub <- get_nhdplushr(sw$wd, rename = FALSE, check_terminals = FALSE)
expect_true("NHDPlusID" %in% names(out_sub$NHDFlowline))

teardown_workdir(sw$wd)
})

test_that("make_standalone", {
skip_on_cran()

get_test_file(work_dir)
sw <- setup_workdir()

get_test_file(sw$wd)

fl <- get_nhdplushr(work_dir, check_terminals = FALSE)$NHDFlowline
fl <- get_nhdplushr(sw$wd, check_terminals = FALSE)$NHDFlowline

sa <- make_standalone(fl)

sa_check <- get_nhdplushr(work_dir, check_terminals = TRUE)$NHDFlowline
sa_check <- get_nhdplushr(sw$wd, check_terminals = TRUE)$NHDFlowline

expect_true(all(sa$LevelPathI == sa_check$LevelPathI))
expect_true(!all(fl$LevelPathI == sa_check$LevelPathI))
Expand All @@ -177,6 +229,6 @@ test_that("make_standalone", {
sample_flines <- make_standalone(sample_flines)

expect_true(0 %in% sample_flines$toCOMID)
})

unlink(work_dir, recursive = TRUE)
teardown_workdir(sw$wd)
})

0 comments on commit ccdd3ef

Please sign in to comment.