Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gvcf metrics #3

Merged
merged 7 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: rPHG
Version: 0.2.0
Version: 0.2.1
Date: 2019-06-03
Title: R front-end for the practical haplotype graph
Authors@R: c(
Expand Down
12 changes: 12 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(calcMutualInfo)
export(configFilePath)
export(dbName)
export(dbType)
export(gvcfMetrics)
export(host)
export(httProtocol)
export(isDemo)
Expand All @@ -24,6 +25,7 @@ export(phgConObj)
export(phgMethodId)
export(phgMethodType)
export(phgType)
export(plotDot)
export(plotGraph)
export(plotMutualInfo)
export(port)
Expand Down Expand Up @@ -72,5 +74,15 @@ exportMethods(serverInfo)
exportMethods(showPHGMethods)
exportMethods(taxaByNode)
importFrom(curl,has_internet)
importFrom(ggplot2,aes)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,scale_color_viridis_c)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,xlab)
importFrom(ggplot2,ylab)
importFrom(methods,setClass)
importFrom(rJava,.jnew)
importFrom(tibble,as_tibble)
7 changes: 7 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## CHANGES IN VERSION 0.2.1
* Added new function, `plotDot()`
+ Creates dot plots from `.anchorspro` files
* Added new function, `gvcfMetrics()`
+ Creates summary metrics for a directory of gVCF files


## CHANGES IN VERSION 0.2.0
* Unified workflow for both local and server instances
* Added new class, `PHGServerCon`
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## rPHG 0.2.1
* Added new function, `plotDot()`
+ Creates dot plots from `.anchorspro` files
* Added new function, `gvcfMetrics()`
+ Creates summary metrics for a directory of gVCF files


## rPHG 0.2.0
* Unified workflow for both local and server instances
* Added new class, `PHGServerCon`
Expand Down
46 changes: 46 additions & 0 deletions R/stats_gvcf_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
## ----
#' @title
#' Generate metrics for GVCF data
#'
#' @param gvcfDir
#' Directory containing GVCF file(s) to process.
#' @param indelReport
#' Do you want to generate an indel size report? Defaults to \code{FALSE}.
#'
#' @importFrom rJava .jnew
#'
#' @export
gvcfMetrics <- function(gvcfDir, indelReport = FALSE) {
rJC <- rJava::.jnew("net.maizegenetics.pangenome.hapCalling.VCFMetricsPlugin")

myVcfStatFile <- tempfile(".tsv")
myIndelStatFile <- tempfile(".tsv")

rJC$vcfDir(gvcfDir)
rJC$outFile(myVcfStatFile)

if (indelReport) {
rJC$indelFile(myIndelStatFile)
}

rJC$run()

vcfStatsDf <- utils::read.table(myVcfStatFile, header = TRUE)

if (indelReport) {
indelStatsDf <- utils::read.table(myIndelStatFile, header = FALSE, fill = TRUE)
}

if (indelReport) {
return(
list(
"gvcf_stats" = vcfStatsDf,
"indel_stats" = indelStatsDf
)
)
} else {
return(vcfStatsDf)
}
}


90 changes: 90 additions & 0 deletions R/vis_plot_dot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
## ----
#' @title
#' Generate dot plot from Anchor files
#'
#' @description
#' This function will generate a dot plot (collinearity) from an Anchor
#' File generated from the program
#' [AnchorWave](https://github.com/baoxingsong/AnchorWave). This is one of the
#' preliminary steps in creating a PHG database.
#'
#' @param anchorPath
#' Path to anchor file.
#' @param querySeqId
#' Vector of sequence IDs (query).
#' @param refSeqId
#' Vector of sequence IDs (reference).
#' @param queryLab
#' Optional label for query axis.
#' @param refLab
#' Optional label for reference axis.
#' @param colorId
#' How to color plots (\code{strand} or \code{score})
#'
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 facet_grid
#' @importFrom ggplot2 geom_point
#' @importFrom ggplot2 ggplot
#' @importFrom ggplot2 scale_color_viridis_c
#' @importFrom ggplot2 scale_x_continuous
#' @importFrom ggplot2 scale_y_continuous
#' @importFrom ggplot2 xlab
#' @importFrom ggplot2 ylab
#'
#' @export
plotDot <- function(
anchorPath,
querySeqId = NULL,
refSeqId = NULL,
queryLab = NULL,
refLab = NULL,
colorId = c("strand", "score")
) {

if (is.null(queryLab)) queryLab <- "Query"
if (is.null(refLab)) refLab <- "Reference"

colorId <- match.arg(colorId)
if (colorId == "score") {
scaleUnit <- ggplot2::scale_color_viridis_c()
} else if (colorId == "strand") {
scaleUnit <- ggplot2::scale_color_manual(
values = c(
"+" = "#DA897C",
"-" = "#0D6A82"
)
)
} else {
scaleUnit <- NULL
}

tmpData <- utils::read.table(file = anchorPath, head = TRUE)

if (!is.null(refSeqId)) tmpData <- tmpData[which(tmpData$refChr %in% refSeqId), ]
if (!is.null(querySeqId)) tmpData <- tmpData[which(tmpData$queryChr %in% querySeqId), ]

toMb <- function(x) x / 1e6

p <- ggplot2::ggplot(data = tmpData) +
ggplot2::aes(
x = rlang::.data[["queryStart"]],
y = rlang::.data[["referenceStart"]],
color = rlang::.data[[colorId]]
) +
ggplot2::geom_point(size = 0.3) +
ggplot2::scale_y_continuous(labels = toMb) +
ggplot2::scale_x_continuous(labels = toMb) +
ggplot2::facet_grid(
rows = ggplot2::vars(rlang::.data[["refChr"]]),
col = ggplot2::vars(rlang::.data[["queryChr"]]),
scales = "free",
space = "free"
) +
scaleUnit +
ggplot2::xlab(paste(queryLab, "(Mbp)")) +
ggplot2::ylab(paste(refLab, "(Mbp)"))

return(p)
}


Loading