Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add geom parameter to plotHaploCounts() #11

Merged
merged 2 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: rPHG2
Title: R interface to PHGv2
Version: 0.6
Version: 0.6.1
Authors@R:
person(
given = "Brandon",
Expand Down Expand Up @@ -41,6 +41,7 @@ Imports:
patchwork,
rJava,
rlang,
S4Vectors,
scales,
tibble,
tools,
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## rPHG2 0.7
* Added new parameter to `plotHaploCounts()`:
+ `geom`
+ allows user to specify a given geometry (line, bar, or point)


## rPHG2 0.6
* Added new parameters to `plogtGvcf()`:
+ `mData` and `mVar`
Expand Down
93 changes: 30 additions & 63 deletions R/class_phg_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,75 +228,42 @@ setMethod(

## ----
#' @param gr
#' A \code{GenomicRanges} object for subsetting based on chromosome ID and
#' start/stop positions (bp). If \code{NULL}, all haplotype counts will be
#' plotted similar to a Manhattan plot. Defaults to \code{NULL}.
#' A \code{GRanges} object specifying a genomic range for filtering the
#' haplotype data. If \code{NULL}, the function will plot the data across the
#' entire dataset.
#' @param geom
#' A character string specifying the type of geometric representation for the
#' plot. Accepted values are:
#' \itemize{
#' \item \code{"l"} for lines (default),
#' \item \code{"b"} for bars,
#' \item \code{"p"} for points.
#' }
#' If an invalid value is provided, the function will raise an error.
#'
#' @details
#' When no genomic range is provided (i.e., \code{gr = NULL}), the function
#' plots the number of unique haplotypes across the entire reference genome or
#' dataset. This will default to point geometry regardless of the value
#' provided within the \code{geom} parameter. If a genomic range is provided,
#' it filters the data based on overlaps between the reference ranges in the
#' dataset and the query range. In both cases, the resulting plot uses
#' \code{ggplot2} for visualization, and different geometries can be selected
#' via the \code{geom} parameter.
#'
#' @return A \code{ggplot} object visualizing the haplotype counts. When
#' \code{gr} is \code{NULL}, the plot shows the number of unique haplotypes
#' across reference positions. When \code{gr} is provided, the plot is filtered
#' to display haplotype counts within the specified range.
#'
#'
#' @rdname plotHaploCounts
#' @export
setMethod(
f = "plotHaploCounts",
signature = signature(object = "PHGDataSet"),
definition = function(object, gr = NULL) {
nHaplo <- numberOfHaplotypes(object, byRefRange = TRUE)
p <- NULL
if (is.null(gr)) {
p <- ggplot2::ggplot(nHaplo) +
ggplot2::aes(x = !!rlang::sym("start"), y = !!rlang::sym("n_haplo")) +
ggplot2::geom_point() +
ggplot2::scale_y_continuous(
breaks = seq_len(max(nHaplo$n_haplo)),
limits = c(1, max(nHaplo$n_haplo))
) +
ggplot2::scale_x_continuous(
labels = scales::label_number(
scale_cut = scales::cut_short_scale()
)
) +
ggplot2::xlab("Position (bp)") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_wrap(~ seqnames, scales = "free_x") +
ggplot2::theme_bw()
} else {
refRanges <- readRefRanges(object)
if (!is(gr, "GRanges")) {
rlang::abort("'gr' object is not of type 'GRanges'")
}

gr$sub_id <- paste0("QR ", GenomeInfoDb::seqnames(gr), ":", IRanges::ranges(gr))

# Find overlaps
overlaps <- suppressWarnings(GenomicRanges::findOverlaps(refRanges, gr))
if (length(overlaps) == 0) {
rlang::abort("No reference ranges identified with given query")
}

# Filter based on overlaps
filtGr <- refRanges[S4Vectors::queryHits(overlaps)]

# Add sub_id metadata
filtGr$sub_id <- gr$sub_id[S4Vectors::subjectHits(overlaps)]
filtGrDf <- as.data.frame(filtGr)
filtGrDf <- merge(x = filtGrDf, y = nHaplo)

p <- ggplot2::ggplot(filtGrDf) +
ggplot2::aes(x = !!rlang::sym("rr_id"), y = !!rlang::sym("n_haplo")) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::scale_y_continuous(breaks = seq(0, max(nHaplo$n_haplo), by = 1)) +
ggplot2::xlab("Reference range ID") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_grid(~ sub_id, scales = "free_x", space = "free") +
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)
)
}

return(p)
definition = function(object, gr = NULL, geom = "l") {
plotHaploFromPhgDataSet(object, gr, geom)
}
)

Expand Down
1 change: 1 addition & 0 deletions R/vis_plot_dot.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
## ----
# Plot a Dot Plot from AnchorWave metrics
#
# @param df
Expand Down
131 changes: 131 additions & 0 deletions R/vis_plot_haplo.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
## ----
# Helper function to construct common ggplot theme
custHapTheme <- function() {
ggplot2::theme_bw() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 0.5,
hjust = 1
)
)
}


## ----
# Helper function to construct the geometry based on the 'geom' parameter
selectGeom <- function(geom) {
switch(geom,
"l" = ggplot2::geom_line(ggplot2::aes(group = 1)),
"b" = ggplot2::geom_bar(stat = "identity"),
"p" = ggplot2::geom_point()
)
}


## ----
# Plot Haplotype Data from PHG Dataset
#
# @description
# This function creates a ggplot visualization of haplotype data from a PHG
# (Practical Haplotype Graph) dataset, allowing for multiple geometric
# representations (lines, bars, points). The plot can either cover the entire
# dataset or a specific genomic range provided by the user.
#
# @param object
# A PHG dataset object containing haplotype information. This is typically the
# output of a function like `loadPhgDataSet()` or similar.
# @param gr
# A \code{GRanges} object specifying a genomic range for filtering the
# haplotype data. If \code{NULL}, the function will plot the data across the
# entire dataset.
# @param geom
# A character string specifying the type of geometric representation for the
# plot. Accepted values are:
# \itemize{
# \item \code{"l"} for lines (default),
# \item \code{"b"} for bars,
# \item \code{"p"} for points.
# }
# If an invalid value is provided, the function will raise an error.
#
# @details
# When no genomic range is provided (i.e., \code{gr = NULL}), the function
# plots the number of unique haplotypes across the entire reference genome or
# dataset. If a genomic range is provided, it filters the data based on
# overlaps between the reference ranges in the dataset and the query range. In
# both cases, the resulting plot uses \code{ggplot2} for visualization, and
# different geometries can be selected via the \code{geom} parameter.
#
# @return A \code{ggplot} object visualizing the haplotype counts. When
# \code{gr} is \code{NULL}, the plot shows the number of unique haplotypes
# across reference positions. When \code{gr} is provided, the plot is filtered
# to display haplotype counts within the specified range.
plotHaploFromPhgDataSet <- function(object, gr = NULL, geom = "l") {
# Validate 'geom' parameter
if (!geom %in% c("l", "b", "p")) {
rlang::abort("'geom' must be one of 'l' (line), 'b' (bar), or 'p' (point).")
}

nHaplo <- numberOfHaplotypes(object, byRefRange = TRUE)

# Plot when 'gr' is NULL
if (is.null(gr)) {
return(
ggplot2::ggplot(nHaplo) +
ggplot2::aes(
x = !!rlang::sym("start"),
y = !!rlang::sym("n_haplo"),
alpha = 0.01
) +
selectGeom("p") +
ggplot2::scale_y_continuous(
breaks = seq_len(max(nHaplo$n_haplo)),
limits = c(1, max(nHaplo$n_haplo))
) +
ggplot2::scale_x_continuous(
labels = scales::label_number(
scale_cut = scales::cut_short_scale()
)
) +
ggplot2::xlab("Position (bp)") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_wrap(~ seqnames, scales = "free_x") +
ggplot2::guides(alpha = "none") +
custHapTheme()
)
}

# Validate 'gr' type
if (!is(gr, "GRanges")) {
rlang::abort("'gr' object is not of type 'GRanges'")
}

# Get ref ranges GRanges object and add 'sub_id" parameters
refRanges <- readRefRanges(object)
gr$sub_id <- paste0("QR ", GenomeInfoDb::seqnames(gr), ":", IRanges::ranges(gr))

# Find overlaps
overlaps <- suppressWarnings(GenomicRanges::findOverlaps(refRanges, gr))

if (length(overlaps) == 0) {
rlang::abort("No reference ranges identified with given query")
}

# Filter and merge data
filtGr <- refRanges[S4Vectors::queryHits(overlaps)]
filtGr$sub_id <- gr$sub_id[S4Vectors::subjectHits(overlaps)]
filtGrDf <- merge(as.data.frame(filtGr), nHaplo)

# Plot with filtered data
ggplot2::ggplot(filtGrDf) +
ggplot2::aes(x = !!rlang::sym("rr_id"), y = !!rlang::sym("n_haplo")) +
selectGeom(geom) +
ggplot2::scale_y_continuous(breaks = seq(0, max(nHaplo$n_haplo), by = 1)) +
ggplot2::xlab("Reference range ID") +
ggplot2::ylab("Number of unique haplotypes") +
ggplot2::facet_grid(~ sub_id, scales = "free_x", space = "free") +
custHapTheme()
}


33 changes: 29 additions & 4 deletions man/plotHaploCounts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions tests/testthat/test_class_phg_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,31 @@ test_that("PHGDataSet class construction tests", {
### expect 4 observations to overlap with successful query
expect_equal(length(testPlot$data$rr_id), 4)

## Geometry ('geom = ') tests
containsGeom <- function(plot, geomType) {
any(
sapply(plot$layers, function(layer) {
inherits(layer$geom, geomType)
})
)
}
expect_true(containsGeom(plotHaploCounts(pds, geom = "p"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, geom = "l"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, geom = "b"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomLine"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "l"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomLine"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "b"), "GeomPoint"))
expect_true(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomPoint"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomBar"))
expect_false(containsGeom(plotHaploCounts(pds, gr = grQuery, geom = "p"), "GeomLine"))

## General errors
expect_error(plotHaploCounts(pds, gr = mtcars))
expect_error(plotHaploCounts(pds, gr = grQueryError))
expect_error(plotHaploCounts(pds, geom = "x"))
})


7 changes: 5 additions & 2 deletions tests/testthat/test_utils_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ test_that("General metrics utility tests", {
# validateHeaders()
x <- "some\tline\tto\ttest"
expect_true(
rPHG2:::validateHeaders(x, c("some", "line", "to", "test"))
validateHeaders(x, c("some", "line", "to", "test"))
)
expect_false(
rPHG2:::validateHeaders(NULL, c("some", "line", "to", "test"))
validateHeaders(NULL, c("some", "line", "to", "test"))
)
expect_false(
validateHeaders(NULL, c("some", "line", "to", "ttest"))
)
})

Expand Down
Loading
Loading