From d4ae70229f6de936024f616602bc0607f17de843 Mon Sep 17 00:00:00 2001
From: sdgamboa <samuel.gamboa.tuz@gmail.com>
Date: Fri, 22 Mar 2024 10:19:20 -0400
Subject: [PATCH] update documentation for all exported functions

---
 DESCRIPTION               |  2 +-
 R/bugphyzz.R              | 76 +++++++++++++++++++++++++++------------
 R/physiologies.R          | 16 ++++-----
 man/getTaxonSignatures.Rd | 13 ++++---
 man/importBugphyzz.Rd     | 43 ++++++++++++++++++----
 man/makeSignatures.Rd     | 25 +++++++------
 man/physiologies.Rd       | 11 +++---
 man/showPhys.Rd           |  5 +--
 8 files changed, 128 insertions(+), 63 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ee56f5d9..a9d6cc96 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: bugphyzz
 Title: A harmonized data resource and software for enrichment analysis of microbial physiologies
-Version: 0.0.1.9
+Version: 0.0.1.10
 Authors@R: 
     c(
         person(
diff --git a/R/bugphyzz.R b/R/bugphyzz.R
index da473842..7723b34b 100644
--- a/R/bugphyzz.R
+++ b/R/bugphyzz.R
@@ -1,17 +1,43 @@
 #' Import bugphyzz
 #'
 #' \code{importBugphyzz} imports bugphyzz annotations as a list of
-#' data.frames (1 per physiology/Attribute)
+#' tidy data.frames. To learn more about the structure of the data.frames
+#' please check the bugphyzz vignette with `browseVignettes("bugphyzz")`.
 #'
-#' @param version Character string. Default is 'devel'
-#' (current file on the GitHub repo waldronlab/bugphyzzExports).
+#' @param version Character string indicating the version.
+#' Options: devel, doi, GitHub hash.
 #' @param force_download Logical value. Force a fresh download of the data or
 #' use the one stored in the cache (if available). Default is FALSE.
-#' @param v Validation value. Default 0.5.
+#' @param v Validation value. Default 0.5 (see details).
 #' @param exclude_rarely Default is TRUE. Exclude values with
-#' Frequency == FALSE.
+#' Frequency == FALSE (see details).
 #'
-#' @return A list of data frames.
+#' @details
+#'
+#' ## Data structure
+#' The data structure of the data.frames imported with `importBugphyzz` are
+#' detailed in the main vignette. Please run `browseVignettes("bugphyzz")`.
+#'
+#' ## Validation (`v` argument)
+#' Data imported with `importBugphyzz` includes annotations imputed through
+#' ancestral state reconstruction (ASR) methods. A 10-fold cross-validation
+#' approach was implemented to assess the reliability of the data imputed.
+#' Mathew's correlation coefficient (MCC) and R-squared (R2) were used for the
+#' validation of discrete and numeric attributes.
+#' Details can be found at: https://github.com/waldronlab/taxPProValidation.
+#' By default, imputed annotations with a MCC or R2 value greater than 0.5 are
+#' imported. The minimum value can be adjusted with the `v` argument (only
+#' values between 0 and 1).
+#'
+#' ## Frequency (exclude_rarely argument)
+#' One of the variables in the bugphyzz data.frames is "Frequency", which
+#' can adopt values of
+#' "always", "usually", "sometimes", "rarely", or "never". By default
+#' "never" and "rarely" are excluded. "rarely" could be included with
+#' `exclude_rarely = FALSE`. To learn more about these frequency keywords
+#' please check the bugphyzz vignette with `browseVignettes("bugphyzz")`.
+#'
+#' @return A list of tidy data frames.
 #' @export
 #'
 #' @examples
@@ -97,31 +123,34 @@ importBugphyzz <- function(
 
 #' Make signatures
 #'
-#' \code{makeSignatures} Creates signatures for a list of bugphyzz
-#' data.frames imported with \code{importBugphyzz}
+#' \code{makeSignatures} Creates signatures for a list of bug signatures from
+#' a tidy data.frame imported through the `importBugphyzz` function. Please
+#' run `browseVignettes("bugphyz")` for detailed examples.
 #'
 #' @param dat A data.frame.
 #' @param tax_id_type A character string. Valid options: NCBI_ID, Taxon_name.
 #' @param tax_level A character vector. Taxonomic rank. Valid options:
-#' kingdom, phylum, class, order, family, genus, species, strain.
+#' superkingdom, kingdom, phylum, class, order, family, genus, species, strain.
 #' They can be combined. "mixed" is equivalent to select all valid ranks.
 #' @param evidence A character vector. Valid options: exp, igc, nas, tas, tax, asr.
 #' They can be combined. Default is all.
 #' @param frequency A character vector. Valid options: always, usually,
-#' sometimes, rarely, unknown. They can be combiend. Default value is all but
-#' rarely.
-#' @param min_size Minimun number of bugs in a signature. Default is 10.
-#' @param min Minimum value inclusive. Only for numeric attributes. Default is NULL.
-#' @param max Maximum value inclusive. Only for numeric attributes. Default is NULL.
+#' sometimes, rarely, unknown. They can be combined. By default, "rarely" is
+#' excluded.
+#' @param min_size Minimum number of bugs in a signature. Default is 10.
+#' @param min Minimum value (inclusive). Only for numeric attributes.
+#' Default is NULL.
+#' @param max Maximum value (inclusive). Only for numeric attributes.
+#' Default is NULL.
 #'
-#' @return A list of character vector with the IDs of the bugs.
+#' @return A list of character vectors with scientific names or taxids.
 #' @export
 #'
 #' @examples
 #'
 #' bp <- importBugphyzz()
-#' sigs <- lapply(bp, makeSignatures)
-#' sigs <- purrr::list_flatten(sigs)
+#' sigs <- purrr::map(bp, makeSignatures)
+#' sigs <- purrr::list_flatten(sigs, name_spec = "{inner}")
 #'
 makeSignatures <- function(
     dat, tax_id_type = "NCBI_ID",
@@ -167,11 +196,13 @@ makeSignatures <- function(
 
 #' Get Taxon Signatures
 #'
-#' \code{getTaxonSignatures} get the names of all of the signatures for a taxon.
+#' \code{getTaxonSignatures} returns the names of all of the signatures associated
+#' with a particular taxon. More details can be found in the main
+#' bugphyzz vignette; please run `browseVignettes("bugphyzz")`.
 #'
 #' @param tax A valid NCBI ID or taxon name. If taxon name is used, the
-#' tax_id_type = "Taxon_name" must also be used.
-#' @param bp Import from \code{importBugphyzz}.
+#' argument tax_id_type = "Taxon_name" must also be used.
+#' @param bp List of data.frames imported with \code{importBugphyzz}.
 #' @param ... Arguments passed to \code{makeSignatures}.
 #'
 #' @return A character vector with the names of the signatures for a taxon.
@@ -179,9 +210,10 @@ makeSignatures <- function(
 #'
 #' @examples
 #' taxid <- "562"
+#' taxonName <- "Escherichia coli"
 #' bp <- importBugphyzz()
-#' sig_names_1 <- getTaxonSignatures("562", bp)
-#' sig_names_2 <- getTaxonSignatures("Escherichia coli", bp, tax_id_type = "Taxon_name")
+#' sig_names_1 <- getTaxonSignatures(taxid, bp)
+#' sig_names_2 <- getTaxonSignatures(taxonName, bp, tax_id_type = "Taxon_name")
 #'
 getTaxonSignatures <- function(tax, bp, ...) {
   sigs <- purrr::map(bp, makeSignatures, ...)
diff --git a/R/physiologies.R b/R/physiologies.R
index 4c90d793..7084bb58 100644
--- a/R/physiologies.R
+++ b/R/physiologies.R
@@ -1,12 +1,9 @@
 
-#' Import phsiologies
+#' Import physiologies (for devs)
 #'
-#' \code{physiologies} imports data from the
-#' Google spreadsheets at https://drive.google.com/drive/folders/1i2UAolVWAYa7UnETNnCs0BDWjKPp3ev5.
-#' This function (and its internal functions) do minimal changes to the
-#' imported data. These changes are only meant to match data coming from
-#' different sources, and attaching information needed for further processing,
-#' such as source and attribute type.
+#' \code{physiologies} imports a list of data.frames. This data is in "raw"
+#' state before cleaning and going through the data imputation steps. It
+#' should be used by developers/curators of the package.
 #'
 #' @param keyword Character vector with one or more valid keywords.
 #' Valid keyboards can be checked with \code{showPhys}. If 'all', all
@@ -89,10 +86,11 @@ physiologies <- function(keyword = 'all', full_source = FALSE) {
   return(physiologies)
 }
 
-#' Show list of available physiologies
+#' Show list of available physiologies (for devs)
 #'
 #' \code{showPhys} prints the names of the available physiologies that can be
-#' imported with the \code{\link{physiologies}} function.
+#' imported with the \code{\link{physiologies}} function. This function
+#' should be used by developers/curators.
 #'
 #' @param which_names A character string. Options: 'all' (default),
 #' 'spreadsheets', 'bacdive'.
diff --git a/man/getTaxonSignatures.Rd b/man/getTaxonSignatures.Rd
index 89bc22aa..7957c6a9 100644
--- a/man/getTaxonSignatures.Rd
+++ b/man/getTaxonSignatures.Rd
@@ -8,9 +8,9 @@ getTaxonSignatures(tax, bp, ...)
 }
 \arguments{
 \item{tax}{A valid NCBI ID or taxon name. If taxon name is used, the
-tax_id_type = "Taxon_name" must also be used.}
+argument tax_id_type = "Taxon_name" must also be used.}
 
-\item{bp}{Import from \code{importBugphyzz}.}
+\item{bp}{List of data.frames imported with \code{importBugphyzz}.}
 
 \item{...}{Arguments passed to \code{makeSignatures}.}
 }
@@ -18,12 +18,15 @@ tax_id_type = "Taxon_name" must also be used.}
 A character vector with the names of the signatures for a taxon.
 }
 \description{
-\code{getTaxonSignatures} get the names of all of the signatures for a taxon.
+\code{getTaxonSignatures} returns the names of all of the signatures associated
+with a particular taxon. More details can be found in the main
+bugphyzz vignette; please run \code{browseVignettes("bugphyzz")}.
 }
 \examples{
 taxid <- "562"
+taxonName <- "Escherichia coli"
 bp <- importBugphyzz()
-sig_names_1 <- getTaxonSignatures("562", bp)
-sig_names_2 <- getTaxonSignatures("Escherichia coli", bp, tax_id_type = "Taxon_name")
+sig_names_1 <- getTaxonSignatures(taxid, bp)
+sig_names_2 <- getTaxonSignatures(taxonName, bp, tax_id_type = "Taxon_name")
 
 }
diff --git a/man/importBugphyzz.Rd b/man/importBugphyzz.Rd
index 4a07b3ac..d6bb95c0 100644
--- a/man/importBugphyzz.Rd
+++ b/man/importBugphyzz.Rd
@@ -12,23 +12,54 @@ importBugphyzz(
 )
 }
 \arguments{
-\item{version}{Character string. Default is 'devel'
-(current file on the GitHub repo waldronlab/bugphyzzExports).}
+\item{version}{Character string indicating the version.
+Options: devel, doi, GitHub hash.}
 
 \item{force_download}{Logical value. Force a fresh download of the data or
 use the one stored in the cache (if available). Default is FALSE.}
 
-\item{v}{Validation value. Default 0.5.}
+\item{v}{Validation value. Default 0.5 (see details).}
 
 \item{exclude_rarely}{Default is TRUE. Exclude values with
-Frequency == FALSE.}
+Frequency == FALSE (see details).}
 }
 \value{
-A list of data frames.
+A list of tidy data frames.
 }
 \description{
 \code{importBugphyzz} imports bugphyzz annotations as a list of
-data.frames (1 per physiology/Attribute)
+tidy data.frames. To learn more about the structure of the data.frames
+please check the bugphyzz vignette with \code{browseVignettes("bugphyzz")}.
+}
+\details{
+\subsection{Data structure}{
+
+The data structure of the data.frames imported with \code{importBugphyzz} are
+detailed in the main vignette. Please run \code{browseVignettes("bugphyzz")}.
+}
+
+\subsection{Validation (\code{v} argument)}{
+
+Data imported with \code{importBugphyzz} includes annotations imputed through
+ancestral state reconstruction (ASR) methods. A 10-fold cross-validation
+approach was implemented to assess the reliability of the data imputed.
+Mathew's correlation coefficient (MCC) and R-squared (R2) were used for the
+validation of discrete and numeric attributes.
+Details can be found at: https://github.com/waldronlab/taxPProValidation.
+By default, imputed annotations with a MCC or R2 value greater than 0.5 are
+imported. The minimum value can be adjusted with the \code{v} argument (only
+values between 0 and 1).
+}
+
+\subsection{Frequency (exclude_rarely argument)}{
+
+One of the variables in the bugphyzz data.frames is "Frequency", which
+can adopt values of
+"always", "usually", "sometimes", "rarely", or "never". By default
+"never" and "rarely" are excluded. "rarely" could be included with
+\code{exclude_rarely = FALSE}. To learn more about these frequency keywords
+please check the bugphyzz vignette with \code{browseVignettes("bugphyzz")}.
+}
 }
 \examples{
 
diff --git a/man/makeSignatures.Rd b/man/makeSignatures.Rd
index 9a6f0c1d..b6ae2c14 100644
--- a/man/makeSignatures.Rd
+++ b/man/makeSignatures.Rd
@@ -21,33 +21,36 @@ makeSignatures(
 \item{tax_id_type}{A character string. Valid options: NCBI_ID, Taxon_name.}
 
 \item{tax_level}{A character vector. Taxonomic rank. Valid options:
-kingdom, phylum, class, order, family, genus, species, strain.
+superkingdom, kingdom, phylum, class, order, family, genus, species, strain.
 They can be combined. "mixed" is equivalent to select all valid ranks.}
 
 \item{evidence}{A character vector. Valid options: exp, igc, nas, tas, tax, asr.
 They can be combined. Default is all.}
 
 \item{frequency}{A character vector. Valid options: always, usually,
-sometimes, rarely, unknown. They can be combiend. Default value is all but
-rarely.}
+sometimes, rarely, unknown. They can be combined. By default, "rarely" is
+excluded.}
 
-\item{min_size}{Minimun number of bugs in a signature. Default is 10.}
+\item{min_size}{Minimum number of bugs in a signature. Default is 10.}
 
-\item{min}{Minimum value inclusive. Only for numeric attributes. Default is NULL.}
+\item{min}{Minimum value (inclusive). Only for numeric attributes.
+Default is NULL.}
 
-\item{max}{Maximum value inclusive. Only for numeric attributes. Default is NULL.}
+\item{max}{Maximum value (inclusive). Only for numeric attributes.
+Default is NULL.}
 }
 \value{
-A list of character vector with the IDs of the bugs.
+A list of character vectors with scientific names or taxids.
 }
 \description{
-\code{makeSignatures} Creates signatures for a list of bugphyzz
-data.frames imported with \code{importBugphyzz}
+\code{makeSignatures} Creates signatures for a list of bug signatures from
+a tidy data.frame imported through the \code{importBugphyzz} function. Please
+run \code{browseVignettes("bugphyz")} for detailed examples.
 }
 \examples{
 
 bp <- importBugphyzz()
-sigs <- lapply(bp, makeSignatures)
-sigs <- purrr::list_flatten(sigs)
+sigs <- purrr::map(bp, makeSignatures)
+sigs <- purrr::list_flatten(sigs, name_spec = "{inner}")
 
 }
diff --git a/man/physiologies.Rd b/man/physiologies.Rd
index 96e7068a..bbe8230b 100644
--- a/man/physiologies.Rd
+++ b/man/physiologies.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/physiologies.R
 \name{physiologies}
 \alias{physiologies}
-\title{Import phsiologies}
+\title{Import physiologies (for devs)}
 \usage{
 physiologies(keyword = "all", full_source = FALSE)
 }
@@ -19,12 +19,9 @@ contain shortened versions of the sources. Default is \code{FALSE}.}
 A list of data.frames in tidy format.
 }
 \description{
-\code{physiologies} imports data from the
-Google spreadsheets at https://drive.google.com/drive/folders/1i2UAolVWAYa7UnETNnCs0BDWjKPp3ev5.
-This function (and its internal functions) do minimal changes to the
-imported data. These changes are only meant to match data coming from
-different sources, and attaching information needed for further processing,
-such as source and attribute type.
+\code{physiologies} imports a list of data.frames. This data is in "raw"
+state before cleaning and going through the data imputation steps. It
+should be used by developers/curators of the package.
 }
 \examples{
 l <- physiologies('all')
diff --git a/man/showPhys.Rd b/man/showPhys.Rd
index 64750c81..4e40c1f9 100644
--- a/man/showPhys.Rd
+++ b/man/showPhys.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/physiologies.R
 \name{showPhys}
 \alias{showPhys}
-\title{Show list of available physiologies}
+\title{Show list of available physiologies (for devs)}
 \usage{
 showPhys(which_names = "all")
 }
@@ -15,7 +15,8 @@ A character vector with the names of the physiologies.
 }
 \description{
 \code{showPhys} prints the names of the available physiologies that can be
-imported with the \code{\link{physiologies}} function.
+imported with the \code{\link{physiologies}} function. This function
+should be used by developers/curators.
 }
 \examples{
 showPhys()