Skip to content

Commit

Permalink
Parse assembly xmls with ncbi_parse()
Browse files Browse the repository at this point in the history
  • Loading branch information
stitam committed Nov 27, 2023
1 parent aeaff36 commit 6140413
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 3 deletions.
16 changes: 14 additions & 2 deletions R/ncbi_parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
#' @examples
#' \dontrun{
#' data(examples)
#' #'
#' # NCBI Assembly, download XML file from NCBI and parse
#'
#' # Manually download the XML file
#' # https://www.ncbi.nlm.nih.gov/assembly/GCF_000299415.1
#' # upper right corner -> send to -> file -> format = xml -> create file
#' # Parse XML
#' ncbi_parse(meta = "assembly_summary.xml", db = "assembly", format = "xml")
#'
#' # NCBI BioSample, fully programmatic access
#'
Expand All @@ -37,7 +45,7 @@
#' # https://www.ncbi.nlm.nih.gov/biosample/?term=SAMN02714232
#' # upper right corner -> send to -> file -> format = full (xml) -> create file
#' # Parse XML
#' ncbi_parse(meta = "biosample_result.xml", db = "biosample", format = "xml")
#' ncbi_parse(meta = "biosample_result.xml", db = "biosample", format = "xml")#'
#' }
#' @export
ncbi_parse <- function(
Expand All @@ -46,8 +54,12 @@ ncbi_parse <- function(
format = "xml",
verbose = getOption("verbose")
) {
db <- match.arg(db, choices = c("assembly", "biosample"))
format <- match.arg(format, choices = c("xml"))
f <- get(paste("ncbi_parse", db, format, sep = "_"))
if (db == "biosample" & format == "xml") {
if (db == "assembly" && format == "xml") {
out <- f(meta, verbose)
} else if (db == "biosample" && format == "xml") {
out <- f(meta, verbose)
} else {
out <- tibble::tibble()
Expand Down
3 changes: 2 additions & 1 deletion R/ncbi_parse_assembly_xml.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#' This function can be used to parse an xml file from the NCBI assembly
#' database into a tibble.
#' @param file character; path to an xml file.
#' @param verbose logical; Should verbose messages be printed to console?
#' @returns a tibble.
#' @examples
#' \dontrun{
Expand All @@ -13,7 +14,7 @@
#' ncbi_parse_assembly_xml("assembly_summary.xml")
#' }
#' @export
ncbi_parse_assembly_xml <- function(file) {
ncbi_parse_assembly_xml <- function(file, verbose = getOption("verbose")) {
foo <- function(x) {
rootnode <- XML::xmlRoot(x)
assembly <- XML::xpathSApply(
Expand Down
Binary file modified data/examples.rda
Binary file not shown.
4 changes: 4 additions & 0 deletions scripts/prep_examples.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
examples <- list(
assembly = c(
"GCF_000002435.2",
"GCF_000299415.1"
),
biosample = c(
"SAMN02714232", # contact email
"SAMD00057211", # different ids, duplicated attributes
Expand Down

0 comments on commit 6140413

Please sign in to comment.