Skip to content

Commit

Permalink
Add ncbi_get_meta() and update deps
Browse files Browse the repository at this point in the history
  • Loading branch information
stitam committed Nov 30, 2023
1 parent 6140413 commit 4411df2
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 20 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export(mgnify_endpoints)
export(mgnify_instance)
export(mgnify_list)
export(ncbi_download_genome)
export(ncbi_get_meta)
export(ncbi_parse)
export(ncbi_parse_assembly_xml)
export(ncbi_parse_biosample_txt)
Expand Down
10 changes: 5 additions & 5 deletions R/get_uid.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ get_uid <- function(term,
foo <- function(x) {
if (is.na(x)) {
if (verbose) webseq_message("na")
return(tibble::tibble(term = x, db = db, uid = NA))
return(tibble::tibble(term = x, db = db, uid = NA_integer_))
}
if (verbose) webseq_message("query", x, appendLF = FALSE)
r <- NULL
Expand All @@ -39,7 +39,7 @@ get_uid <- function(term,
}
if (inherits(hit, "try-error")) {
if (verbose) webseq_message("service_down")
return(tibble::tibble(term = x, db = db, uid = NA))
return(tibble::tibble(term = x, db = db, uid = NA_integer_))
}
if (hit$count > hit$retmax) {
r <- NULL
Expand All @@ -54,14 +54,14 @@ get_uid <- function(term,
}
if (inherits(hit, "try-error")) {
if (verbose) webseq_message("service_down")
return(tibble::tibble(term = x, db = db, uid = NA))
return(tibble::tibble(term = x, db = db, uid = NA_integer_))
}
if (length(hit$ids) > 0) {
if (verbose) message("OK.")
return(tibble::tibble(term = x, db = db, uid = hit$ids))
return(tibble::tibble(term = x, db = db, uid = as.integer(hit$ids)))
} else {
if (verbose) message("Not found. Returning NA.")
return(tibble::tibble(term = x, db = db, uid = NA))
return(tibble::tibble(term = x, db = db, uid = NA_integer_))
}
}
if (is.null(cache_file)) {
Expand Down
53 changes: 53 additions & 0 deletions R/ncbi_get_meta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#' Get sequence metadata from NCBI
#'
#' This function is a wrapper for \code{rentrez::entrez_fetch()} that retrieves
#' metadata from a given NCBI sequence database. The function currently works
#' with the following databases: \code{"assembly"}, \code{"biosample"}.
#' @param id integer; an integer vector of database specific NCBI UIDs.
#' @param db character; the database to search in. For options see
#' \code{rentrez::entrez_dbs()}.
#' @param verbose logical; Should verbose messages be printed to console?
#' @examples
#' \dontrun{
#' data(examples)
#' uids <- get_uid(examples$biosample, db = "biosample")
#' meta <- ncbi_get_meta(uids$uid, db = "biosample")
#' }
#' @export
ncbi_get_meta <- function(
id,
db,
batch_size = 250,
verbose = getOption("verbose")
) {
if (!"integer" %in% class(id)) {
stop("id must be an integer vector.")
}
idlist <- list()
if (length(id) > batch_size) {
nbatch <- ceiling(length(id)/batch_size)
for (i in 1:nbatch) {
idlist[[i]] <- id[((i-1)*batch_size + 1):min(i*batch_size, length(id))]
}
} else {
idlist[[1]] <- id
}
if (db == "assembly") {
rettype <- "docsum"
retmode <- "xml"
}
if (db == "biosample") {
rettype <- "full"
retmode <- "xml"
}
out <- lapply(idlist, function(x) {
rentrez::entrez_fetch(
db = db,
id = x,
rettype = rettype,
retmode = retmode
)
})
class(out) <- c(paste("ncbi", db, "meta", sep = "_"), class(out))
return(out)
}
7 changes: 1 addition & 6 deletions R/ncbi_parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@
#' # Get internal BioSample UID for BioSample ID
#' biosample_uid <- get_uid(examples$biosample, db = "biosample")
#' # Get metadata in XML format
#' meta_xml <- rentrez::entrez_fetch(
#' db = "biosample",
#' id = biosample_uid$uid,
#' rettype = "full",
#' retmode = "xml"
#' )
#' meta <- ncbi_get_meta(uids$uid, db = "biosample")
#' # Parse XML
#' ncbi_parse(meta = meta_xml, db = "biosample", format = "xml")
#'
Expand Down
5 changes: 4 additions & 1 deletion R/ncbi_parse_biosample_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ ncbi_parse_biosample_xml <- function(
biosample_xml,
verbose = getOption("verbose")
) {
parsed_xml <- xml2::as_list(xml2::read_xml(biosample_xml))[[1]]
parsed_xml <- lapply(biosample_xml, function(x) {
xml2::as_list(xml2::read_xml(x))[[1]]
})
parsed_xml <- unlist(parsed_xml, recursive = FALSE)
names(parsed_xml) <- sapply(parsed_xml, function(x) attributes(x)$accession)
out <- lapply(parsed_xml, ncbi_parse_biosample_xml_entry)
out <- dplyr::bind_rows(out)
Expand Down
28 changes: 28 additions & 0 deletions man/ncbi_get_meta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 10 additions & 7 deletions man/ncbi_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/ncbi_parse_assembly_xml.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4411df2

Please sign in to comment.