Merge pull request #22 from federicomarini/mosdefied-pcaExplorer

Mosdefied pcaExplorer
federicomarini · Sep 20, 2024 · 6744736 · 6744736
2 parents bb12fb3 + 61f0fb3
commit 6744736
Show file tree

Hide file tree

Showing 37 changed files with 552 additions and 514 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,4 @@ docs/*
 ^\.github$
 ^CODE_OF_CONDUCT\.md$
 ^LICENSE\.md$
+^TODO\.md$
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -68,12 +68,11 @@ jobs:
         if: runner.os == 'Linux'
         env:
           RHUB_PLATFORM: linux-x86_64-ubuntu-gcc
-        run: |
-          Rscript -e "remotes::install_github('r-hub/sysreqs')"
-          sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))")
-          sudo -s eval "$sysreqs"
-          sudo apt-get update && sudo apt-get -y install libcurl4-openssl-dev libglpk-dev libharfbuzz-dev libfribidi-dev
-
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::rcmdcheck
+          pak-version: devel
+
       - name: Install system dependencies (macOS)
         if: runner.os == 'macOS'
         run: |

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: pcaExplorer
 Type: Package
 Title: Interactive Visualization of RNA-seq Data Using a Principal Components Approach
-Version: 2.31.0
-Date: 2024-04-07
+Version: 2.99.0
+Date: 2024-09-13
 Authors@R: c(person("Federico", "Marini", role = c("aut", "cre"), 
     email ="[email protected]", 
     comment = c(ORCID = '0000-0003-3252-7758')))
@@ -11,10 +11,10 @@ Description: This package provides functionality for interactive visualization
     allow for quick information extraction and effective data exploration. A Shiny
     application encapsulates the whole analysis.
 License: MIT + file LICENSE
-LazyData: TRUE
 Imports: 
     DESeq2, 
     SummarizedExperiment, 
+    mosdef (>= 1.1.0),
     GenomicRanges, 
     IRanges,
     S4Vectors, 
@@ -58,6 +58,7 @@ BugReports: https://github.com/federicomarini/pcaExplorer/issues
 biocViews: ImmunoOncology, Visualization, RNASeq, DimensionReduction,
     PrincipalComponent, QualityControl, GUI, ReportWriting, ShinyApps
 VignetteBuilder: knitr
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Encoding: UTF-8
 NeedsCompilation: no
+Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -49,6 +49,8 @@ importFrom(grDevices,rgb)
 importFrom(heatmaply,heatmaply)
 importFrom(limma,goana)
 importFrom(limma,topGO)
+importFrom(mosdef,gene_plot)
+importFrom(mosdef,run_topGO)
 importFrom(pheatmap,pheatmap)
 importFrom(plotly,plotlyOutput)
 importFrom(plotly,renderPlotly)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# pcaExplorer 2.99.0
+
+## Other notes
+
+* The transition to the functions available in the `mosdef` Bioconductor is complete, with the original functions now being deprecated. This applies to `topGOtable()` (now replaced by `mosdef::run_topGO()`)
+* The gene plot widgets now also use the `gene_plot()` function from `mosdef`, instead of the previous undocumented internal function
+* The Roxygen-based documentation now supports markdown. No visible changes should appear to the user, as the content should have stayed fairly the same
+* Although no visible changes for the end user are expected, the incoming major version bump will reflect the change in the dependency graph, ensuring that this is noticed at least at the version numbering level
+
 # pcaExplorer 2.22.0
 
 ## Other notes

diff --git a/R/correlatePCs.R b/R/correlatePCs.R
@@ -2,15 +2,15 @@
 #'
 #' Computes the significance of (cor)relations between PCA scores and the sample
 #' experimental covariates, using Kruskal-Wallis test for categorial variables
-#' and the \code{cor.test} based on Spearman's correlation for continuous
+#' and the `cor.test` based on Spearman's correlation for continuous
 #' variables
 #'
-#' @param pcaobj A \code{prcomp} object
-#' @param coldata A \code{data.frame} object containing the experimental
+#' @param pcaobj A `prcomp` object
+#' @param coldata A `data.frame` object containing the experimental
 #' covariates
 #' @param pcs A numeric vector, containing the corresponding PC number
 #'
-#' @return A \code{data.frame} object with computed p values for each covariate
+#' @return A `data.frame` object with computed p values for each covariate
 #' and for each principal component
 #'
 #' @examples
@@ -23,7 +23,7 @@
 #' @export
 correlatePCs <- function(pcaobj, coldata, pcs = 1:4) {
   # split the analysis for continuous and categorial
-  coldataTypes <- sapply(coldata, class)
+  coldataTypes <- vapply(coldata, class, character(1))
   # extract the scores from the pc object
   x <- pcaobj$x
 
@@ -53,10 +53,10 @@ correlatePCs <- function(pcaobj, coldata, pcs = 1:4) {
 #'
 #' Plots the significance of the (cor)relation of each covariate vs a principal component
 #'
-#' @param pccorrs A \code{data.frame} object generated by \link{correlatePCs}
+#' @param pccorrs A `data.frame` object generated by [correlatePCs]
 #' @param pc An integer number, corresponding to the principal component of
 #' interest
-#' @param logp Logical, defaults to \code{TRUE}, displays the -\code{log10} of
+#' @param logp Logical, defaults to `TRUE`, displays the -`log10` of
 #' the pvalue instead of the p value itself
 #'
 #' @return A base plot object

diff --git a/R/deprecated.R b/R/deprecated.R
@@ -0,0 +1,34 @@
+#' Deprecated functions in pcaExplorer
+#'
+#' Functions that are on their way to the function afterlife.
+#' Their successors are also listed.
+#' 
+#' The successors of these functions are likely coming after the rework that
+#' led to the creation of the `mosdef` package. See more into its 
+#' documentation for more details.
+#' 
+#' @param ... Ignored arguments.
+#' 
+#' @return All functions throw a warning, with a deprecation message pointing 
+#' towards its descendent (if available).
+#' 
+#' @name deprecated
+#' 
+#' @section Transitioning to the mosdef framework:
+#' 
+#' - [topGOtable()] is now being replaced by the more flexible 
+#' [mosdef::run_topGO()] function
+#' 
+#' @author Federico Marini
+#' 
+#' @examples
+#' # try(topGOtable())
+#' 
+NULL
+
+
+## #' @export
+## #' @rdname defunct
+## trendVar <- function(...) {
+##   .Defunct("fitTrendVar")
+## }
diff --git a/R/distro_expr.R b/R/distro_expr.R
@@ -1,8 +1,8 @@
 #' Plot distribution of expression values
 #'
-#' @param rld A \code{\link{DESeqTransform}} object.
-#' @param plot_type Character, choose one of \code{boxplot}, \code{violin} or
-#' \code{density}. Defaults to \code{density}
+#' @param rld A [DESeqTransform()] object.
+#' @param plot_type Character, choose one of `boxplot`, `violin` or
+#' `density`. Defaults to `density`
 #'
 #' @return A plot with the distribution of the expression values
 #' @export

diff --git a/R/genespca.R b/R/genespca.R
@@ -3,14 +3,14 @@
 #' Computes and plots the principal components of the genes, eventually displaying
 #' the samples as in a typical biplot visualization.
 #'
-#' The implementation of this function is based on the beautiful \code{ggbiplot}
+#' The implementation of this function is based on the beautiful `ggbiplot`
 #' package developed by Vince Vu, available at https://github.com/vqv/ggbiplot.
 #' The adaptation and additional parameters are tailored to display typical genomics data
 #' such as the transformed counts of RNA-seq experiments
 #'
-#' @param x A \code{\link{DESeqTransform}} object, with data in \code{assay(x)},
-#' produced for example by either \code{\link{rlog}} or
-#' \code{\link{varianceStabilizingTransformation}}
+#' @param x A [DESeqTransform()] object, with data in `assay(x)`,
+#' produced for example by either [rlog()] or
+#' [varianceStabilizingTransformation()]
 #' @param ntop Number of top genes to use for principal components,
 #' selected by highest row variance
 #' @param choices Vector of two numeric values, to select on which principal components to plot
@@ -37,7 +37,7 @@
 #' @param circle.prob Size of the correlation circle in Normal probability
 #' @param varname.size Size of the text for variable names
 #' @param varname.adjust  Adjustment factor the placement of the variable names,
-#'  >= 1 means farther from the arrow
+#' '>= 1' means farther from the arrow
 #' @param varname.abbrev  Logical, whether or not to abbreviate the variable names
 #' @param returnData Logical, if TRUE returns a data.frame for further use, containing the
 #' selected principal components for custom plotting
@@ -47,10 +47,10 @@
 #' to allow for distinguishing where the variables are plotted
 #' @param useRownamesAsLabels Logical, if TRUE uses the row names as labels for plotting
 #' @param point_size Size of the points to be plotted for the observations (genes)
-#' @param annotation A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
-#' and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols
+#' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids)
+#' and a column, `gene_name`, containing e.g. HGNC-based gene symbols
 #'
-#' @return An object created by \code{ggplot}, which can be assigned and further customized.
+#' @return An object created by `ggplot`, which can be assigned and further customized.
 #'
 #' @examples
 #'

diff --git a/R/get_annotation.R b/R/get_annotation.R
@@ -1,17 +1,17 @@
 #' Get an annotation data frame from biomaRt
 #'
-#' @param dds A \code{\link{DESeqDataSet}} object
+#' @param dds A [DESeqDataSet()] object
 #' @param biomart_dataset A biomaRt dataset to use. To see the list, type
-#' \code{mart = useMart('ensembl')}, followed by \code{listDatasets(mart)}.
+#' `mart = useMart('ensembl')`, followed by `listDatasets(mart)`.
 #' @param idtype Character, the ID type of the genes as in the row names of
-#' \code{dds}, to be used for the call to \code{\link{getBM}}
+#' `dds`, to be used for the call to [getBM()]
 #'
-#' @return A data frame for ready use in \code{pcaExplorer}, retrieved from biomaRt.
+#' @return A data frame for ready use in `pcaExplorer`, retrieved from biomaRt.
 #' @export
 #'
 #' @examples
-#' library(airway)
-#' data(airway)
+#' library("airway")
+#' data("airway", package = "airway")
 #' airway
 #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
 #'                                              colData = colData(airway),
@@ -44,24 +44,24 @@ To obtain a list, type mart = useMart('ensembl'), followed by listDatasets(mart)
 
 #' Get an annotation data frame from org db packages
 #'
-#' @param dds A \code{\link{DESeqDataSet}} object
-#' @param orgdb_species Character string, named as the \code{org.XX.eg.db}
+#' @param dds A [DESeqDataSet()] object
+#' @param orgdb_species Character string, named as the `org.XX.eg.db`
 #' package which should be available in Bioconductor
 #' @param idtype Character, the ID type of the genes as in the row names of
-#' \code{dds}, to be used for the call to \code{\link{mapIds}}
+#' `dds`, to be used for the call to [mapIds()]
 #' @param key_for_genenames Character, corresponding to the column name for the 
 #' key in the orgDb package containing the official gene name (often called 
 #' gene symbol). 
 #' This parameter defaults to "SYMBOL", but can be adjusted in case the key is not
-#' found in the annotation package (e.g. for \code{org.Sc.sgd.db}).
+#' found in the annotation package (e.g. for `org.Sc.sgd.db`).
 #'
-#' @return A data frame for ready use in \code{pcaExplorer}, retrieved from the
+#' @return A data frame for ready use in `pcaExplorer`, retrieved from the
 #' org db packages
 #' @export
 #'
 #' @examples
-#' library(airway)
-#' data(airway)
+#' library("airway")
+#' data("airway", package = "airway")
 #' airway
 #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
 #'                                              colData = colData(airway),

diff --git a/R/hi_loadings.R b/R/hi_loadings.R
@@ -1,16 +1,16 @@
 #' Extract genes with highest loadings
 #'
-#' @param pcaobj A \code{prcomp} object
+#' @param pcaobj A `prcomp` object
 #' @param whichpc An integer number, corresponding to the principal component of
 #' interest
 #' @param topN Integer, number of genes with top and bottom loadings
-#' @param exprTable A \code{matrix} object, e.g. the counts of a \code{\link{DESeqDataSet}}.
+#' @param exprTable A `matrix` object, e.g. the counts of a [DESeqDataSet()].
 #' If not NULL, returns the counts matrix for the selected genes
-#' @param annotation A \code{data.frame} object, with row.names as gene identifiers (e.g. ENSEMBL ids)
-#' and a column, \code{gene_name}, containing e.g. HGNC-based gene symbols
+#' @param annotation A `data.frame` object, with row.names as gene identifiers (e.g. ENSEMBL ids)
+#' and a column, `gene_name`, containing e.g. HGNC-based gene symbols
 #' @param title The title of the plot
 #'
-#' @return A ggplot2 object, or a \code{matrix}, if \code{exprTable} is not null
+#' @return A ggplot2 object, or a `matrix`, if `exprTable` is not null
 #'
 #' @examples
 #' dds <- makeExampleDESeqDataSet_multifac(betaSD = 3, betaSD_tissue = 1)

diff --git a/R/makeds.R b/R/makeds.R
@@ -1,11 +1,11 @@
 #' Make a simulated DESeqDataSet for two or more experimental factors
 #'
 #' Constructs a simulated dataset of Negative Binomial data from different conditions.
-#' The fold changes between the conditions can be adjusted with the \code{betaSD_condition}
-#' and the \code{betaSD_tissue} arguments.
+#' The fold changes between the conditions can be adjusted with the `betaSD_condition`
+#' and the `betaSD_tissue` arguments.
 #'
 #' This function is designed and inspired following the proposal of
-#' \code{\link{makeExampleDESeqDataSet}} from the \code{DESeq2} package. Credits are given
+#' [makeExampleDESeqDataSet()] from the `DESeq2` package. Credits are given
 #' to Mike Love for the nice initial implementation
 #'
 #' @param n number of rows (genes)
@@ -15,10 +15,10 @@
 #' @param interceptMean the mean of the intercept betas (log2 scale)
 #' @param interceptSD the standard deviation of the intercept betas (log2 scale)
 #' @param dispMeanRel a function specifying the relationship of the dispersions on
-#' \code{2^trueIntercept}
+#' `2^trueIntercept`
 #' @param sizeFactors multiplicative factors for each sample
 #'
-#' @return a \code{\link{DESeqDataSet}} with true dispersion,
+#' @return a [DESeqDataSet()] with true dispersion,
 #' intercept for two factors (condition and tissue) and beta values in the
 #'  metadata columns.  Note that the true betas are provided on the log2 scale.
 #'

diff --git a/R/pair_corr.R b/R/pair_corr.R
@@ -3,17 +3,17 @@
 #' @param df A data frame, containing the (raw/normalized/transformed) counts
 #' @param log Logical, whether to convert the input values to log2 (with addition
 #' of a pseudocount). Defaults to FALSE.
-#' @param method Character string, one of \code{pearson} (default), \code{kendall}, or
-#' \code{spearman} as in \code{cor}
+#' @param method Character string, one of `pearson` (default), `kendall`, or
+#' `spearman` as in `cor`
 #' @param use_subset Logical value. If TRUE, only 1000 values per sample will be used
 #' to speed up the plotting operations.
 #'
 #' @return A plot with pairwise scatter plots and correlation coefficients
 #' @export
 #'
 #' @examples
-#' library(airway)
-#' data(airway)
+#' library("airway")
+#' data("airway", package = "airway")
 #' airway
 #' dds_airway <- DESeq2::DESeqDataSetFromMatrix(assay(airway),
 #'                                              colData = colData(airway),