Skip to content

Commit

Permalink
Merge pull request #7 from maize-genetics/gvcf-metrics
Browse files Browse the repository at this point in the history
Add gVCF plotting support
  • Loading branch information
btmonier authored Jul 9, 2024
2 parents e2cbff7 + 92aaa6b commit 4e6bca2
Show file tree
Hide file tree
Showing 14 changed files with 771 additions and 16 deletions.
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rPHG2
Title: R interface to PHGv2
Version: 0.4
Version: 0.5
Authors@R:
person(
given = "Brandon",
Expand Down Expand Up @@ -32,13 +32,16 @@ Imports:
cli,
curl,
GenomicRanges,
GenomeInfoDb,
ggplot2,
httr,
IRanges,
jsonlite,
methods,
patchwork,
rJava,
rlang,
scales,
tibble,
tools,
utils
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(numberOfRefRanges)
export(numberOfTaxa)
export(phgType)
export(plotDot)
export(plotGvcf)
export(port)
export(readHapIdMetaData)
export(readHapIdPosMetaData)
Expand All @@ -44,6 +45,7 @@ exportClasses(PHGServerCon)
exportMethods("$")
exportMethods("metricsIds<-")
exportMethods("metricsTable<-")
exportMethods("seqnames<-")
exportMethods(brapiURL)
exportMethods(brapiVersion)
exportMethods(filterRefRanges)
Expand All @@ -61,14 +63,18 @@ exportMethods(numberOfRefRanges)
exportMethods(numberOfTaxa)
exportMethods(phgType)
exportMethods(plotDot)
exportMethods(plotGvcf)
exportMethods(port)
exportMethods(readHapIdMetaData)
exportMethods(readHapIdPosMetaData)
exportMethods(readHapIds)
exportMethods(readPhgDataSet)
exportMethods(readRefRanges)
exportMethods(readSamples)
exportMethods(seqnames)
exportMethods(serverInfo)
importFrom(GenomeInfoDb,"seqnames<-")
importFrom(GenomeInfoDb,seqnames)
importFrom(curl,has_internet)
importFrom(methods,is)
importFrom(utils,.DollarNames)
12 changes: 12 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## rPHG 0.5
* Added new visualization method, `plotGvcf()`:
+ auto plotting various gVCF metrics
+ granular metric support through formula subsetting
* Added new accessor and setting method, `seqnames()`
+ Returns all contig IDs found in a `PHGMetrics` object
+ Setter version (`seqnames()<-`) will update old IDs found within a
`data.frame` object
* Added new default methods to `plotGvcf()` and `plotDot()` for `PHGMetrics`
objects that only contain one alignment or gVCF dataset


## rPHG2 0.4
* Added vignettes and README updates
+ `vignette("rPHG2")`
Expand Down
24 changes: 24 additions & 0 deletions R/class_all_generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,20 @@ setGeneric("phgType", function(object, ...) standardGeneric("phgType"))
setGeneric("plotDot", function(object, ...) standardGeneric("plotDot"))


## ----
#' @title Generate GVCF plots
#'
#' @description
#' Generates general GVCF metrics plots for genome-wide GVCF statistics
#'
#' @param object an \code{rPHG2} metrics object
#' @param ... Additional arguments, for use in specific methods
#'
#' @rdname plotGvcf
#' @export
setGeneric("plotGvcf", function(object, ...) standardGeneric("plotGvcf"))


## ----
#' @title Return port value
#'
Expand Down Expand Up @@ -361,3 +375,13 @@ setGeneric("readSamples", function(object, ...) standardGeneric("readSamples"))
setGeneric("serverInfo", function(object, ...) standardGeneric("serverInfo"))


## ----
#' @importFrom GenomeInfoDb seqnames
NULL


## ----
#' @importFrom GenomeInfoDb seqnames<-
NULL


208 changes: 196 additions & 12 deletions R/class_phg_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ setMethod(
signature = signature(object = "PHGMetrics"),
definition = function(
object,
metricId,
metricId = NULL,
querySeqId = NULL,
refSeqId = NULL,
queryLab = NULL,
Expand All @@ -533,26 +533,210 @@ setMethod(
rlang::abort("This method currently does not support multiple ID plotting")
}

if (length(metricId) == 0) {
rlang::abort("ID is not a valid AnchorWave table")
}
if (is.null(metricId)) {
df <- metricsTable(object = object, type = "align")

if (!metricId %in% metricsIds(object, type = "align")) {
rlang::abort("ID is not a valid AnchorWave table")
# Check if return object is data.frame (singular) or list (many)
# If "list" -> we need to return first data.frame element
if (!is(df, "data.frame")) {
df <- df[[1]]
}
} else {
if (!metricId %in% metricsIds(object, type = "align") || length(metricId) == 0) {
rlang::abort("ID is not a valid AnchorWave table")
}
df <- metricsTable(object, metricId)
}

p <- plotDotFromMetrics(
df = metricsTable(object, metricId),
metricId = metricId,
df = df,
metricId = metricId,
querySeqId = querySeqId,
refSeqId = refSeqId,
queryLab = queryLab,
refLab = refLab,
colorId = colorId
refSeqId = refSeqId,
queryLab = queryLab,
refLab = refLab,
colorId = colorId
)

return(p)
}
)


## ---
#' @param object
#' A \code{PHGMetrics} object containing the gVCF data.
#' @param metricId
#' A character vector specifying the ID of the metric to be plotted. Only a
#' single ID is supported.
#' @param f
#' A formula object defining the plot.
#' @param nRow
#' An integer specifying the number of rows in the plot layout.
#' @param nCol
#' An integer specifying the number of columns in the plot layout.
#' @param tag
#' What tag type do you want passed to final plot?
#'
#' @return A plot object generated from the specified gVCF data and layout.
#'
#' @rdname plotGvcf
#' @export
setMethod(
f = "plotGvcf",
signature = signature(object = "PHGMetrics"),
definition = function(
object,
metricId = NULL,
f = NULL,
nRow = NULL,
nCol = NULL,
tag = "A"
) {
if (length(metricId) > 1) {
rlang::abort("This method currently does not support multiple ID plotting")
}

if (is.null(metricId)) {
df <- metricsTable(object = object, type = "gvcf")

# Check if return object is data.frame (singular) or list (many)
# If "list" -> we need to return first data.frame element
if (!is(df, "data.frame")) {
df <- df[[1]]
}
} else {
if (!metricId %in% metricsIds(object, type = "gvcf") || length(metricId) == 0) {
rlang::abort("ID is not a valid gVCF table")
}
df <- metricsTable(object, metricId)
}

if (is.null(f)) {
f <- CORE ~ ALL
}

p <- plotGvcfFromMetrics(
df = df,
formula = f,
nRow = nRow,
nCol = nCol,
tag = tag
)

return(p)
}
)


## ----
#' @title
#' Return all contig IDs from metrics object
#'
#' @param x
#' A \code{PHGMetrics} object
#'
#' @return A vector of unique contig IDs
#' @importFrom GenomeInfoDb seqnames
#' @export
setMethod(
f = "seqnames",
signature = signature(x = "PHGMetrics"),
definition = function(x) {
tables <- metricsTable(x)

seqIds <- unlist(
lapply(tables, function(df) {
if ("query_chr" %in% names(df) && !is.null(df$query_chr)) {
ids <- df$query_chr
}

if ("chrom" %in% names(df) && !is.null(df$chrom)) {
ids <- df$chrom
ids <- ids[ids != "ALL"]
}

return(ids)
})
)

return(unique(seqIds))
}
)


## ----
#' @title
#' Set Sequence Names for PHGMetrics Object
#'
#' @description
#' This method replaces old IDs with new IDs in the `PHGMetrics` object.
#' It ensures that both `metricAlign` and `metricGvcf` fields are updated
#' with the new sequence names provided in the `value` data frame.
#'
#' @param x
#' A `PHGMetrics` object.
#' @param value
#' A \code{data.frame} object containing `old_id` and `new_id` columns for ID
#' replacement.
#'
#' @details
#' The method first validates that the `value` data frame contains the
#' necessary columns (`old_id` and `new_id`). Then, it replaces the old IDs
#' with the new IDs in both `metricAlign` and `metricGvcf` fields of the
#' `PHGMetrics` object. If a replacement ID is not found, the original ID is
#' retained.
#'
#' @return The `PHGMetrics` object with updated sequence names.
#'
#' @examples
#' \dontrun{
#' newIds <- data.frame(
#' old_id = c("old_1", "old_2", "old_3"),
#' new_id = c("new_01", "new_02", "new_03")
#' )
#'
#' # Assume 'met' is a PHGMetrics object
#' seqnames(met) <- newIds
#' }
#'
#' @importFrom GenomeInfoDb seqnames<-
#' @export
setMethod(
f = "seqnames<-",
signature = signature(x = "PHGMetrics"),
definition = function(x, value) {
if (is(value, "data.frame")) {
validIds <- c("old_id", "new_id")
if (any(!validIds %in% colnames(value))) {
rlang::abort("'data.frame' object does not contain correct IDs ('old_id', 'new_id')")
}
} else {
rlang::abort("Only 'data.frame' objects are currently allowed")
}

# Helper function to replace IDs
replaceIds <- function(x, slot_name, field, value) {
metrics <- methods::slot(x, slot_name)
if (is.null(metrics) || length(metrics) == 0) return()
len <- if (is(metrics, "data.frame")) 1 else length(metrics)
for (i in seq_len(len)) {
oldIds <- as.character(metrics[[i]][[field]])
replacements <- stats::setNames(value$new_id, value$old_id)
newIds <- ifelse(oldIds %in% names(replacements), replacements[oldIds], oldIds)
metrics[[i]][[field]] <- newIds
}
methods::slot(x, slot_name) <<- metrics
}

# Replace IDs in metricAlign if it's not NULL
replaceIds(x, "metricAlign", "query_chr", value)

# Replace IDs in metricGvcf if it's not NULL
replaceIds(x, "metricGvcf", "chrom", value)

return(x)
}
)


Loading

0 comments on commit 4e6bca2

Please sign in to comment.