Skip to content

Commit

Permalink
Update function names, tests, documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
stitam committed Nov 27, 2023
1 parent 56d9c3b commit aeaff36
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 123 deletions.
9 changes: 1 addition & 8 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,20 @@
export(ena_query)
export(extract_accn)
export(flag_files)
export(get_meta)
export(get_uid)
export(link_uid)
export(mgnify_endpoints)
export(mgnify_instance)
export(mgnify_list)
export(ncbi_download_genome)
export(ncbi_parse)
export(ncbi_parse_assembly_xml)
export(ncbi_parse_biosample_txt)
export(parse_gb_header)
export(parse_report)
importFrom(XML,xmlParse)
importFrom(XML,xmlRoot)
importFrom(XML,xmlToList)
importFrom(curl,curl_download)
importFrom(curl,handle_setopt)
importFrom(curl,new_handle)
importFrom(dplyr,bind_cols)
importFrom(dplyr,bind_rows)
importFrom(httr,RETRY)
importFrom(httr,content)
importFrom(rentrez,entrez_fetch)
importFrom(stringr,str_locate)
importFrom(tibble,as_tibble)
29 changes: 0 additions & 29 deletions R/get_meta.R

This file was deleted.

14 changes: 8 additions & 6 deletions R/ncbi_meta.R → R/ncbi_parse.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
#' Parse NCBI sequence metadata
#'
#' This function can be used to parse various non-sequence data sets from NCBI
#' into a tibble. The function currently supports parsing NCBI BioSample data
#' from XML format.
#' into a tibble. These data sets usually accompany the biological sequences and
#' contain additional information e.g. identifiers, information about the
#' sample, the sequencing platform, etc. The function currently supports parsing
#' NCBI BioSample data from XML format.
#' @param meta character; either a character vector containing a data set that
#' was retrieved through \code{rentrez::entrez_fetch()} or a path to an file
#' that was downloaded from NCBI.
Expand All @@ -27,24 +29,24 @@
#' retmode = "xml"
#' )
#' # Parse XML
#' ncbi_meta(meta = meta_xml, db = "biosample", format = "xml")
#' ncbi_parse(meta = meta_xml, db = "biosample", format = "xml")
#'
#' # NCBI BioSample, download XML file from NCBI and parse
#'
#' # Manually download the XML file
#' # https://www.ncbi.nlm.nih.gov/biosample/?term=SAMN02714232
#' # upper right corner -> send to -> file -> format = full (xml) -> create file
#' # Parse XML
#' ncbi_meta(meta = "biosample_result.xml", db = "biosample", format = "xml")
#' ncbi_parse(meta = "biosample_result.xml", db = "biosample", format = "xml")
#' }
#' @export
ncbi_meta <- function(
ncbi_parse <- function(
meta,
db,
format = "xml",
verbose = getOption("verbose")
) {
f <- get(paste("ncbi_meta", db, format, sep = "_"))
f <- get(paste("ncbi_parse", db, format, sep = "_"))
if (db == "biosample" & format == "xml") {
out <- f(meta, verbose)
} else {
Expand Down
6 changes: 3 additions & 3 deletions R/ncbi_meta_biosample.R → R/ncbi_parse_biosample_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
#' that was retrieved through \code{rentrez::entrez_fetch()} or a path to an xml
#' file that was downloaded from NCBI BioSample.
#' @param verbose logical; Should verbose messages be printed to console?
ncbi_meta_biosample_xml <- function(
ncbi_parse_biosample_xml <- function(
biosample_xml,
verbose = getOption("verbose")
) {
parsed_xml <- xml2::as_list(xml2::read_xml(biosample_xml))[[1]]
names(parsed_xml) <- sapply(parsed_xml, function(x) attributes(x)$accession)
out <- lapply(parsed_xml, ncbi_meta_biosample_xml_entry)
out <- lapply(parsed_xml, ncbi_parse_biosample_xml_entry)
out <- dplyr::bind_rows(out)
out <- out[, c(
"biosample_uid",
Expand All @@ -23,7 +23,7 @@ ncbi_meta_biosample_xml <- function(
return(out)
}

ncbi_meta_biosample_xml_entry <- function(x, verbose = getOption("verbose")) {
ncbi_parse_biosample_xml_entry <- function(x, verbose = getOption("verbose")) {
# attributes(x)$names contains all fields! use for validation
main_attrs <- attributes(x)
expected_names <- c(
Expand Down
35 changes: 0 additions & 35 deletions man/get_meta.Rd

This file was deleted.

28 changes: 0 additions & 28 deletions man/ncbi_meta_biosample.Rd

This file was deleted.

55 changes: 55 additions & 0 deletions man/ncbi_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/ncbi_parse_biosample_xml.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-get_uid.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ test_that("get_uid works with a complex term", {

expect_s3_class(res, c("tbl_df", "tbl", "data.frame"))
expect_true(nrow(res) > 3000)
})
})
13 changes: 0 additions & 13 deletions tests/testthat/test-ncbi_meta.R

This file was deleted.

13 changes: 13 additions & 0 deletions tests/testthat/test-ncbi_parse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
test_that("ncbi_parse() works with BioSamples", {
data(examples)
biosample_uid <- get_uid(examples$biosample, db = "biosample")
meta_xml <- rentrez::entrez_fetch(
db = "biosample",
id = biosample_uid$uid,
rettype = "full",
retmode = "xml"
)
res <- ncbi_parse(meta = meta_xml, db = "biosample", format = "xml")
expect_s3_class(res, c("tbl_df", "tbl", "data.frame"))
expect_equal(res$biosample, c("SAMN02714232", "SAMD00057211", "SAMN32745369"))
})

0 comments on commit aeaff36

Please sign in to comment.