Skip to content

Commit

Permalink
Merge pull request #11 from maize-genetics/doc-fix
Browse files Browse the repository at this point in the history
Add `geom` parameter to `plotHaploCounts()`
  • Loading branch information
btmonier authored Aug 15, 2024
2 parents 10b1470 + f305eae commit 2daca22
Show file tree
Hide file tree
Showing 10 changed files with 244 additions and 86 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rPHG2
Title: R interface to PHGv2
Version: 0.6
Version: 0.6.1
Authors@R:
person(
given = "Brandon",
Expand Down Expand Up @@ -41,6 +41,7 @@ Imports:
patchwork,
rJava,
rlang,
S4Vectors,
scales,
tibble,
tools,
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## rPHG2 0.7
* Added new parameter to `plotHaploCounts()`:
+ `geom`
+ allows user to specify a given geometry (line, bar, or point)


## rPHG2 0.6
* Added new parameters to `plogtGvcf()`:
+ `mData` and `mVar`
Expand Down
93 changes: 30 additions & 63 deletions R/class_phg_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,75 +228,42 @@ setMethod(

## ----
#' @param gr
#' A \code{GenomicRanges} object for subsetting based on chromosome ID and
#' start/stop positions (bp). If \code{NULL}, all haplotype counts will be
#' plotted similar to a Manhattan plot. Defaults to \code{NULL}.
#' A \code{GRanges} object specifying a genomic range for filtering the
#' haplotype data. If \code{NULL}, the function will plot the data across the
#' entire dataset.
#' @param geom
#' A character string specifying the type of geometric representation for the
#' plot. Accepted values are:
#' \itemize{
#' \item \code{"l"} for lines (default),
#' \item \code{"b"} for bars,
#' \item \code{"p"} for points.
#' }
#' If an invalid value is provided, the function will raise an error.
#'
#' @details
#' When no genomic range is provided (i.e., \code{gr = NULL}), the function
#' plots the number of unique haplotypes across the entire reference genome or
#' dataset. This will default to point geometry regardless of the value
#' provided within the \code{geom} parameter. If a genomic range is provided,
#' it filters the data based on overlaps between the reference ranges in the
#' dataset and the query range. In both cases, the resulting plot uses
#' \code{ggplot2} for visualization, and different geometries can be selected
#' via the \code{geom} parameter.
#'
#' @return A \code{ggplot} object visualizing the haplotype counts. When
#' \code{gr} is \code{NULL}, the plot shows the number of unique haplotypes
#' across reference positions. When \code{gr} is provided, the plot is filtered
#' to display haplotype counts within the specified range.
#'
#'
#' @rdname plotHaploCounts
#' @export
setMethod(
f = "plotHaploCounts",
signature = signature(object = "PHGDataSet"),
definition = function(object, gr = NULL) {
nHaplo <- numberOfHaplotypes(object, byRefRange = TRUE)
p <- NULL
if (is.null(gr)) {
p <- ggplot2::ggplot(nHaplo) +
ggplot2::aes(x = !!rlang::sym("start"), y = !!rlang::sym("n_haplo")) +
ggplot2::geom_point() +
ggplot2::scale_y_continuous(
breaks = seq_len(max(nHaplo$n_haplo)),
limits = c(1, max(nHaplo$n_haplo))
) +
ggplot2::scale_x_continuous(
labels = scales::label_number(
scale_cut = scales::cut_short_scale()
)
) +
ggplot2::xlab("Position (bp)") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_wrap(~ seqnames, scales = "free_x") +
ggplot2::theme_bw()
} else {
refRanges <- readRefRanges(object)
if (!is(gr, "GRanges")) {
rlang::abort("'gr' object is not of type 'GRanges'")
}

gr$sub_id <- paste0("QR ", GenomeInfoDb::seqnames(gr), ":", IRanges::ranges(gr))

# Find overlaps
overlaps <- suppressWarnings(GenomicRanges::findOverlaps(refRanges, gr))
if (length(overlaps) == 0) {
rlang::abort("No reference ranges identified with given query")
}

# Filter based on overlaps
filtGr <- refRanges[S4Vectors::queryHits(overlaps)]

# Add sub_id metadata
filtGr$sub_id <- gr$sub_id[S4Vectors::subjectHits(overlaps)]
filtGrDf <- as.data.frame(filtGr)
filtGrDf <- merge(x = filtGrDf, y = nHaplo)

p <- ggplot2::ggplot(filtGrDf) +
ggplot2::aes(x = !!rlang::sym("rr_id"), y = !!rlang::sym("n_haplo")) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::scale_y_continuous(breaks = seq(0, max(nHaplo$n_haplo), by = 1)) +
ggplot2::xlab("Reference range ID") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_grid(~ sub_id, scales = "free_x", space = "free") +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)
)
}

return(p)
definition = function(object, gr = NULL, geom = "l") {
plotHaploFromPhgDataSet(object, gr, geom)
}
)

Expand Down
1 change: 1 addition & 0 deletions R/vis_plot_dot.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
## ----
# Plot a Dot Plot from AnchorWave metrics
#
# @param df
Expand Down
131 changes: 131 additions & 0 deletions R/vis_plot_haplo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
## ----
# Helper function to construct common ggplot theme
custHapTheme <- function() {
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)
)
}


## ----
# Helper function to construct the geometry based on the 'geom' parameter
selectGeom <- function(geom) {
switch(geom,
"l" = ggplot2::geom_line(ggplot2::aes(group = 1)),
"b" = ggplot2::geom_bar(stat = "identity"),
"p" = ggplot2::geom_point()
)
}


## ----
# Plot Haplotype Data from PHG Dataset
#
# @description
# This function creates a ggplot visualization of haplotype data from a PHG
# (Practical Haplotype Graph) dataset, allowing for multiple geometric
# representations (lines, bars, points). The plot can either cover the entire
# dataset or a specific genomic range provided by the user.
#
# @param object
# A PHG dataset object containing haplotype information. This is typically the
# output of a function like `loadPhgDataSet()` or similar.
# @param gr
# A \code{GRanges} object specifying a genomic range for filtering the
# haplotype data. If \code{NULL}, the function will plot the data across the
# entire dataset.
# @param geom
# A character string specifying the type of geometric representation for the
# plot. Accepted values are:
# \itemize{
# \item \code{"l"} for lines (default),
# \item \code{"b"} for bars,
# \item \code{"p"} for points.
# }
# If an invalid value is provided, the function will raise an error.
#
# @details
# When no genomic range is provided (i.e., \code{gr = NULL}), the function
# plots the number of unique haplotypes across the entire reference genome or
# dataset. If a genomic range is provided, it filters the data based on
# overlaps between the reference ranges in the dataset and the query range. In
# both cases, the resulting plot uses \code{ggplot2} for visualization, and
# different geometries can be selected via the \code{geom} parameter.
#
# @return A \code{ggplot} object visualizing the haplotype counts. When
# \code{gr} is \code{NULL}, the plot shows the number of unique haplotypes
# across reference positions. When \code{gr} is provided, the plot is filtered
# to display haplotype counts within the specified range.
plotHaploFromPhgDataSet <- function(object, gr = NULL, geom = "l") {
# Validate 'geom' parameter
if (!geom %in% c("l", "b", "p")) {
rlang::abort("'geom' must be one of 'l' (line), 'b' (bar), or 'p' (point).")
}

nHaplo <- numberOfHaplotypes(object, byRefRange = TRUE)

# Plot when 'gr' is NULL
if (is.null(gr)) {
return(
ggplot2::ggplot(nHaplo) +
ggplot2::aes(
x = !!rlang::sym("start"),
y = !!rlang::sym("n_haplo"),
alpha = 0.01
) +
selectGeom("p") +
ggplot2::scale_y_continuous(
breaks = seq_len(max(nHaplo$n_haplo)),
limits = c(1, max(nHaplo$n_haplo))
) +
ggplot2::scale_x_continuous(
labels = scales::label_number(
scale_cut = scales::cut_short_scale()
)
) +
ggplot2::xlab("Position (bp)") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_wrap(~ seqnames, scales = "free_x") +
ggplot2::guides(alpha = "none") +
custHapTheme()
)
}

# Validate 'gr' type
if (!is(gr, "GRanges")) {
rlang::abort("'gr' object is not of type 'GRanges'")
}

# Get ref ranges GRanges object and add 'sub_id" parameters
refRanges <- readRefRanges(object)
gr$sub_id <- paste0("QR ", GenomeInfoDb::seqnames(gr), ":", IRanges::ranges(gr))

# Find overlaps
overlaps <- suppressWarnings(GenomicRanges::findOverlaps(refRanges, gr))

if (length(overlaps) == 0) {
rlang::abort("No reference ranges identified with given query")
}

# Filter and merge data
filtGr <- refRanges[S4Vectors::queryHits(overlaps)]
filtGr$sub_id <- gr$sub_id[S4Vectors::subjectHits(overlaps)]
filtGrDf <- merge(as.data.frame(filtGr), nHaplo)

# Plot with filtered data
ggplot2::ggplot(filtGrDf) +
ggplot2::aes(x = !!rlang::sym("rr_id"), y = !!rlang::sym("n_haplo")) +
selectGeom(geom) +
ggplot2::scale_y_continuous(breaks = seq(0, max(nHaplo$n_haplo), by = 1)) +
ggplot2::xlab("Reference range ID") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_grid(~ sub_id, scales = "free_x", space = "free") +
custHapTheme()
}


33 changes: 29 additions & 4 deletions man/plotHaploCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions tests/testthat/test_class_phg_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,31 @@ test_that("PHGDataSet class construction tests", {
### expect 4 observations to overlap with successful query
expect_equal(length(testPlot$data$rr_id), 4)

## Geometry ('geom = ') tests
containsGeom <- function(plot, geomType) {
any(
sapply(plot$layers, function(layer) {
inherits(layer$geom, geomType)
})
)
}
expect_true(containsGeom(plotHaploCounts(pds, geom = "p"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, geom = "l"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, geom = "b"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomLine"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomLine"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomPoint"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomLine"))

## General errors
expect_error(plotHaploCounts(pds, gr = mtcars))
expect_error(plotHaploCounts(pds, gr = grQueryError))
expect_error(plotHaploCounts(pds, geom = "x"))
})


7 changes: 5 additions & 2 deletions tests/testthat/test_utils_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ test_that("General metrics utility tests", {
# validateHeaders()
x <- "some\tline\tto\ttest"
expect_true(
rPHG2:::validateHeaders(x, c("some", "line", "to", "test"))
validateHeaders(x, c("some", "line", "to", "test"))
)
expect_false(
rPHG2:::validateHeaders(NULL, c("some", "line", "to", "test"))
validateHeaders(NULL, c("some", "line", "to", "test"))
)
expect_false(
validateHeaders(NULL, c("some", "line", "to", "ttest"))
)
})

Expand Down
Loading

0 comments on commit 2daca22

Please sign in to comment.