Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Set up import from Zenodo, GitHub hash; remove .downloadZ #240

Merged
merged 24 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
8ec8443
Set up import from Zenodo, GitHub hash; remove .downloadZ
jwokaty Mar 29, 2024
54949ee
Add function for downloading specific hash from GH and devel, and fro…
sdgamboa Apr 1, 2024
ba67696
add a few more tests for imrpotBugphyzz
sdgamboa Apr 1, 2024
47973b5
update test with checkNAs
sdgamboa Apr 1, 2024
cff6102
combine testing for devel and hash in a single if statement
sdgamboa Apr 1, 2024
114eabd
remove default version in unexported functions used by importBugphyzz
sdgamboa Apr 1, 2024
1451df3
update attributes talbe according to the tests
sdgamboa Apr 1, 2024
57b2b2f
update curation tests
sdgamboa Apr 1, 2024
2c69d8f
update test physiologies
sdgamboa Apr 1, 2024
79b8ed4
update README with TODOs, for other branches
sdgamboa Apr 1, 2024
9c4325a
fix quotation in attributes.tsv
sdgamboa Apr 1, 2024
25c5fce
remove unnecessary LICENSE file
sdgamboa Apr 1, 2024
e08266a
Validation data was hosted on github. The data was downloaded to extd…
sdgamboa Apr 1, 2024
30f8a92
Remove line of reference to GitHub when importing validation data
sdgamboa Apr 1, 2024
43872f6
Add check-bioc
jwokaty Apr 2, 2024
606cedf
add description for the files in extdata
sdgamboa Apr 2, 2024
4477e42
update hash
sdgamboa Apr 2, 2024
d7d785b
Remove separate pkgdown workflow
jwokaty Apr 2, 2024
4040b28
Update hash in test
sdgamboa Apr 5, 2024
338836a
fix indents and length of lines
sdgamboa Apr 8, 2024
701073c
update hash of github resource
sdgamboa Apr 9, 2024
2f3439d
update PICRUst2 reference for NSTI definition
sdgamboa Apr 9, 2024
8e189a9
update tests
sdgamboa Apr 10, 2024
6c63337
update importBugphyzz and tests with Zenodo DOI
sdgamboa Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 41 additions & 29 deletions R/bugphyzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ utils::globalVariables(c(
#' tidy data.frames. To learn more about the structure of the data.frames
#' please check the bugphyzz vignette with `browseVignettes("bugphyzz")`.
#'
#' @param version Character string indicating the version.
#' Options: devel or a zenodo record.
#' @param version Character string indicating the version. Default is the
#' latest release on Zenodo. Options: Zenodo DOI, GitHub commit hash, or devel.
#' @param force_download Logical value. Force a fresh download of the data or
#' use the one stored in the cache (if available). Default is FALSE.
#' @param v Validation value. Default 0.5 (see details).
Expand Down Expand Up @@ -54,12 +54,14 @@ utils::globalVariables(c(
#' names(bp)
#'
importBugphyzz <- function(
version = 'devel', force_download = FALSE, v = 0.5, exclude_rarely = TRUE
version = "d3fd894", force_download = FALSE, v = 0.5, exclude_rarely = TRUE

) {
if (version == 'devel') {
output <- .downloadDevel(force_download)
}

## output is a list of three data.frames
## one of each: binary, multistate, numeric
output <- .downloadResource(version, force_download)

## TODO add release version
output <- lapply(output, function(x) split(x, x$Attribute))
output <- purrr::list_flatten(output)
Expand Down Expand Up @@ -297,30 +299,21 @@ getTaxonSignatures <- function(tax, bp, ...) {
)
}

## Import the devel version of bupghyzz
.downloadDevel <- function(force_download) {
types <- c("multistate", "binary", "numeric")
urls <- paste0(
"https://github.com/waldronlab/bugphyzzExports/raw/main/bugphyzz_",
types,
".csv"
)
names(urls) <- types
output <- vector("list", length(urls))
for (i in seq_along(output)) {
message("Importing ", names(urls)[i], " data...")
names(output)[i] <- names(urls)[i]
rpath <- .getResource(
rname = paste0("bugphyzz_", names(urls)[i], ".tsv"),
url = urls[i], verbose = TRUE, force = force_download
)
output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |>
dplyr::mutate(Attribute = tolower(Attribute))
## Import a version of bupghyzz
.downloadResource <- function(version = "d3fd894", force_download) {
sdgamboa marked this conversation as resolved.
Show resolved Hide resolved
if (stringr::str_detect(version, "^10.5281/zenodo.[0-9]+$")) {
suffix <- sub("^10.5281/zenodo\\.", "", version)
output <- .downloadZ(suffix, force_download)
} else if (version == "devel") {
output <- .downloadGH(version, force_download)
} else if (stringr::str_detect(version, stringr::regex("^[:alnum:]{7}$")) ){
output <- .downloadGH(version, force_download)
sdgamboa marked this conversation as resolved.
Show resolved Hide resolved
} else {
stop("Version must be a Zenodo DOI, GitHub commit hash, or 'devel'.")
}
return(output)
}

## TODO update this function when relase is ready
.downloadZ <- function(record, force_download) {
base_url <- paste0("https://zenodo.org/api/records/", record)
req <- httr2::request(base_url)
Expand All @@ -340,9 +333,28 @@ getTaxonSignatures <- function(tax, bp, ...) {

output <- vector("list", length(files))
for (i in seq_along(output)) {
output[[i]] <- utils::read.csv(files[i], header = TRUE)
# output[[i]] <- utils::read.csv(files, header = TRUE, skip = 1)
# dplyr::mutate(Attribute = tolower(Attribute))
output[[i]] <- utils::read.csv(files, header = TRUE, skip = 1) |>
dplyr::mutate(Attribute = tolower(Attribute))
}
return(output)
}

.downloadGH <- function(version = "devel", force_download) {
file_suffix <- c("binary", "multistate", "numeric")
urls <- paste0("https://github.com/waldronlab/bugphyzzExports/raw/",
version, "/bugphyzz_", file_suffix, ".csv"
)
names(urls) <- c("binary", "multistate", "numeric")
output <- vector("list", length(urls))
for (i in seq_along(output)) {
message("Importing ", names(urls)[i], " data...")
names(output)[i] <- names(urls)[i]
rpath <- .getResource(
rname = paste0("bugphyzz_", names(urls)[i], ".csv"),
url = urls[i], verbose = TRUE, force = force_download
)
output[[i]] <- utils::read.csv(rpath, header = TRUE, skip = 1) |>
dplyr::mutate(Attribute = tolower(Attribute))
}
return(output)
}
6 changes: 3 additions & 3 deletions man/importBugphyzz.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 20 additions & 8 deletions tests/testthat/test-importBugphyzz.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Setup -------------------------------------------------------------------

library(purrr)
bp <- importBugphyzz()
expected_columns_multistate <- c(
NCBI_ID = "integer", Taxon_name = "character",
Rank = "character",
Expand Down Expand Up @@ -62,17 +61,30 @@ checkColumnTypes <- function(x) {
return(all(lgl_vct))
}

# Tests -------------------------------------------------------------------
checkNAs <- function(x) {
x
}

test_that("importBugphyzz works", {
chr_vct <- map_chr(bp, class)
expect_true(all("data.frame" == chr_vct))
})
# tests -------------------------------------------------------------------

test_that("All variable names are correct", {
test_that("importBugphyzz works with devel", {
bp <- importBugphyzz(version = "devel", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
})

test_that("All variable types are correct", {
test_that("importBugphyzz works with hash", {
bp <- importBugphyzz(version = "d3fd894", force_download = TRUE)
expect_true(all("data.frame" == map_chr(bp, class)))
expect_true(all(map_lgl(bp, ~ nrow(.x) > 0)))
expect_true(all(map_lgl(bp, checkColumnNames)))
expect_true(all(map_lgl(bp, checkColumnTypes)))
})

## TODO create test for using Zenodo

test_that("importBugphyzz doesn't work with other words", {
expect_error(importBugphyzz(version = "abcd-1234", force_download = TRUE))
})
Loading