Skip to content

Commit

Permalink
Add generic funtion for parsing ncbi data
Browse files Browse the repository at this point in the history
  • Loading branch information
stitam committed Nov 27, 2023
1 parent b23dd59 commit 56d9c3b
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 26 deletions.
54 changes: 54 additions & 0 deletions R/ncbi_meta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#' Parse NCBI sequence metadata
#'
#' This function can be used to parse various non-sequence data sets from NCBI
#' into a tibble. The function currently supports parsing NCBI BioSample data
#' from XML format.
#' @param meta character; either a character vector containing a data set that
#' was retrieved through \code{rentrez::entrez_fetch()} or a path to an file
#' that was downloaded from NCBI.
#' @param db character; the NCBI database from which the data was retrieved.
#' Currently only \code{"biosample"} is supported.
#' @param format character; the format of the data set. Currently only
#' \code{"xml"} is supported.
#' @param verbose logical; Should verbose messages be printed to console?
#' @examples
#' \dontrun{
#' data(examples)
#'
#' # NCBI BioSample, fully programmatic access
#'
#' # Get internal BioSample UID for BioSample ID
#' biosample_uid <- get_uid(examples$biosample, db = "biosample")
#' # Get metadata in XML format
#' meta_xml <- rentrez::entrez_fetch(
#' db = "biosample",
#' id = biosample_uid$uid,
#' rettype = "full",
#' retmode = "xml"
#' )
#' # Parse XML
#' ncbi_meta(meta = meta_xml, db = "biosample", format = "xml")
#'
#' # NCBI BioSample, download XML file from NCBI and parse
#'
#' # Manually download the XML file
#' # https://www.ncbi.nlm.nih.gov/biosample/?term=SAMN02714232
#' # upper right corner -> send to -> file -> format = full (xml) -> create file
#' # Parse XML
#' ncbi_meta(meta = "biosample_result.xml", db = "biosample", format = "xml")
#' }
#' @export
ncbi_meta <- function(
meta,
db,
format = "xml",
verbose = getOption("verbose")
) {
f <- get(paste("ncbi_meta", db, format, sep = "_"))
if (db == "biosample" & format == "xml") {
out <- f(meta, verbose)
} else {
out <- tibble::tibble()
}
return(out)
}
26 changes: 0 additions & 26 deletions R/ncbi_meta_biosample.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,6 @@
#' that was retrieved through \code{rentrez::entrez_fetch()} or a path to an xml
#' file that was downloaded from NCBI BioSample.
#' @param verbose logical; Should verbose messages be printed to console?
#' @examples
#' \dontrun{
#' data(examples)
#'
#' # Option 1 - fully programmatic access
#'
#' # Get internal BioSample UID for BioSample ID
#' biosample_uid <- get_uid(examples$biosample, db = "biosample")
#' # Get metadata in XML format
#' meta_xml <- rentrez::entrez_fetch(
#' db = "biosample",
#' id = biosample_uid$uid,
#' rettype = "full",
#' retmode = "xml"
#' )
#' # Parse XML
#' ncbi_meta_biosample_xml(meta_xml)
#'
#' # Option 2 - download XML file from NCBI and parse
#'
#' # Manually download the XML file
#' # https://www.ncbi.nlm.nih.gov/biosample/?term=SAMN02714232
#' # upper right corner -> send to -> file -> format = full (xml) -> create file
#' # Parse XML
#' ncbi_meta_biomsample_xml("biosample_result.xml")
#' }
ncbi_meta_biosample_xml <- function(
biosample_xml,
verbose = getOption("verbose")
Expand Down

0 comments on commit 56d9c3b

Please sign in to comment.