Skip to content

Commit

Permalink
Merge pull request #124 from inbo/download_multi
Browse files Browse the repository at this point in the history
download_zenodo(): use curl::multi_download() for parallel = TRUE; add unit tests
  • Loading branch information
hansvancalster authored Nov 21, 2023
2 parents 20a9eeb + a8781bf commit 1bfb740
Show file tree
Hide file tree
Showing 9 changed files with 71 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"title": "inborutils: Collection of Useful R Utilities",
"version": "0.3.1",
"version": "0.4.0",
"license": "MIT",
"upload_type": "software",
"description": "<p>While working on research projects, typical small functionalities are\nuseful across these projects. Instead of copy-pasting these functions in\nall individual project repositories/folders, this package collects these\nfunctions for reuse by ourself and - if useful - others as well.<\/p>",
Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ abstract: While working on research projects, typical small functionalities are
identifiers:
- type: url
value: https://inbo.github.io/inborutils/
version: 0.3.1
version: 0.4.0
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: inborutils
Title: Collection of Useful R Utilities
Version: 0.3.1
Version: 0.4.0
Authors@R: c(
person("Hans", "Van Calster", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0000-0001-8595-8426", affiliation = "Research Institute for Nature and Forest (INBO)")),
Expand Down Expand Up @@ -50,7 +50,6 @@ Imports:
jsonlite,
leaflet,
lubridate,
parallel,
purrr,
readr,
rgbif,
Expand All @@ -73,7 +72,8 @@ Suggests:
odbc,
rmarkdown,
sp,
testthat
testthat,
withr
VignetteBuilder:
knitr
Config/checklist/communities: inbo
Expand Down
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,6 @@ importFrom(lubridate,is.POSIXt)
importFrom(lubridate,parse_date_time)
importFrom(lubridate,tz)
importFrom(lubridate,ymd_h)
importFrom(parallel,clusterMap)
importFrom(parallel,makeCluster)
importFrom(parallel,stopCluster)
importFrom(purrr,map_chr)
importFrom(purrr,map_lgl)
importFrom(purrr,pmap_dfr)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# inborutils 0.4.0

* Improved the parallel download method in `download_zenodo()` and made it the
default

# inborutils 0.3.1

* Fixed a bug in `download_zenodo()`
Expand Down
55 changes: 9 additions & 46 deletions R/download_zenodo.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
#' @param doi a doi pointer to the Zenodo archive starting with
#' '10.5281/zenodo.'.
#' See examples.
#' @param parallel Logical (\code{FALSE} by default).
#' If \code{TRUE}, will run a number of parallel processes, each downloading
#' another file.
#' This is useful when multiple large files are present in the Zenodo
#' record, which otherwise would be downloaded sequentially.
#' @param parallel Logical.
#' If \code{TRUE} (the default), files will be
#' downloaded concurrently for multi-file records.
#' Of course, the operation is limited by bandwidth and traffic limitations.
#' @param quiet Logical (\code{FALSE} by default).
#' Do you want to suppress informative messages (not warnings)?
Expand All @@ -36,10 +34,6 @@
#' is.string
#' is.flag
#' noNA
#' @importFrom parallel
#' makeCluster
#' clusterMap
#' stopCluster
#'
#' @export
#' @family download_functions
Expand All @@ -58,7 +52,7 @@
#' }
download_zenodo <- function(doi,
path = ".",
parallel = FALSE,
parallel = TRUE,
quiet = FALSE) {
assert_that(is.string(doi), is.string(path))
assert_that(is.flag(parallel), noNA(parallel), is.flag(quiet), noNA(quiet))
Expand Down Expand Up @@ -107,43 +101,12 @@ download_zenodo <- function(doi,
)
}

if (parallel) {
nr_nodes <- min(10, length(file_urls))

if (!quiet) {
message(
"Initializing parallel download on ",
nr_nodes,
" R session nodes...\n"
)
}

clus <- makeCluster(nr_nodes)

if (!quiet) {
message(
"Starting parallel downloads. ",
"This may take a while (and I can't show you the overall progress).\n",
"Be patient...\n"
)
}

clusterMap(
clus,
function(src, dest) {
curl_download(
url = src,
destfile = dest,
quiet = quiet
)
},
file_urls,
destfiles
if (length(file_urls) > 1 && parallel) {
curl::multi_download(
urls = file_urls,
destfiles = destfiles,
progress = !quiet
)

stopCluster(clus)

if (!quiet) message("Ended parallel downloads.")
} else {
mapply(curl_download,
file_urls,
Expand Down
4 changes: 2 additions & 2 deletions inst/CITATION
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ citHeader("To cite `inborutils` in publications please use:")
# begin checklist entry
bibentry(
bibtype = "Manual",
title = "inborutils: Collection of Useful R Utilities. Version 0.3.1",
title = "inborutils: Collection of Useful R Utilities. Version 0.4.0",
author = c( author = c(person(given = "Hans", family = "Van Calster"), person(given = "Damiano", family = "Oldoni"), person(given = "Stijn", family = "Van Hoey"))),
year = 2023,
url = "https://inbo.github.io/inborutils/",
abstract = "While working on research projects, typical small functionalities are useful across these projects. Instead of copy-pasting these functions in all individual project repositories/folders, this package collects these functions for reuse by ourself and - if useful - others as well.",
textVersion = "Van Calster, Hans; Oldoni, Damiano; Van Hoey, Stijn (2023) inborutils: Collection of Useful R Utilities. Version 0.3.1. https://inbo.github.io/inborutils/",
textVersion = "Van Calster, Hans; Oldoni, Damiano; Van Hoey, Stijn (2023) inborutils: Collection of Useful R Utilities. Version 0.4.0. https://inbo.github.io/inborutils/",
keywords = "helper functions; utilities; coding club",
)
# end checklist entry
10 changes: 4 additions & 6 deletions man/download_zenodo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions tests/testthat/test-zenodo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
test_that("download_zenodo() works for a single-file record", {
zenodo_dir <- tempfile()
withr::local_file(zenodo_dir)
dir.create(zenodo_dir)
expect_no_error(
download_zenodo(doi = "10.5281/zenodo.3784149", path = zenodo_dir) |>
suppressMessages()
)
})

test_that("download_zenodo() works for a GitHub code record", {
zenodo_dir <- tempfile()
withr::local_file(zenodo_dir)
dir.create(zenodo_dir)
expect_no_error(
download_zenodo(doi = "10.5281/zenodo.7335805", path = zenodo_dir) |>
suppressMessages()
)
})

test_that("download_zenodo() works for a multi-file record", {
zenodo_dir <- tempfile()
withr::local_file(zenodo_dir)
dir.create(zenodo_dir)
expect_no_error(
download_zenodo(
doi = "10.5281/zenodo.4420858",
path = zenodo_dir
) |>
suppressMessages()
)
})

test_that("download_zenodo() can work sequentially for a multi-file record", {
zenodo_dir <- tempfile()
withr::local_file(zenodo_dir)
dir.create(zenodo_dir)
expect_no_error(
download_zenodo(
doi = "10.5281/zenodo.4420858",
path = zenodo_dir,
parallel = FALSE
) |>
suppressMessages()
)
})

0 comments on commit 1bfb740

Please sign in to comment.