From 18e86b330297e0f04130141e4d0aed354950e04d Mon Sep 17 00:00:00 2001 From: Brandon Date: Wed, 23 Oct 2024 14:43:36 -0500 Subject: [PATCH] Add checks --- DESCRIPTION | 2 +- NEWS.md | 9 +++++ R/class_phg_con_local.R | 40 +++++++++++++++++--- R/class_phg_dataset.R | 2 +- R/init_phg.R | 45 ++++++++++++----------- R/read_hap_ids.R | 4 ++ README.md | 9 ++--- man/PHGLocalCon-class.Rd | 8 ++++ man/PHGLocalCon.Rd | 5 ++- tests/testthat/setup.R | 4 +- tests/testthat/test_class_phg_con_local.R | 15 +++++++- vignettes/rPHG2.Rmd | 9 +++++ 12 files changed, 114 insertions(+), 38 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 14c488a..82cff91 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: rPHG2 Title: R interface to PHGv2 -Version: 0.7 +Version: 0.7.1 Authors@R: person( given = "Brandon", diff --git a/NEWS.md b/NEWS.md index 2665007..d4598b0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +## rPHG2 0.8 +* Fixed missing IDs returned in `readHapIds()` method calls. + + missing values (i.e., `"null"`) now return as `NA`s in the matrix object +* Removed `rlang::abort()` condition in `initPhg()` + + Now returns "warning" message instead of low-level `stop()` procedure +* Modified the `PHGLocalCon()` constructor to now take in either files or a + directory path + + ## rPHG2 0.7 * Added new parameter to `plotHaploCounts()`: + `geom` diff --git a/R/class_phg_con_local.R b/R/class_phg_con_local.R index 7d3392a..fed4244 100644 --- a/R/class_phg_con_local.R +++ b/R/class_phg_con_local.R @@ -54,18 +54,47 @@ setValidity("PHGLocalCon", function(object) { #' DB data for a given set of PHG-related methods. #' #' @param hVcfFiles -#' A list of \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF} -#' files as a \code{character} vector +#' A path to a directory or file containing valid \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF} +#' files (ending in either \code{.h.vcf} or \code{.h.vcf.gz}) as a +#' \code{character} vector #' #' @export PHGLocalCon <- function(hVcfFiles) { - # This is probably overkill right now, but will revisit once the TileDB - # C API is better integrated in PHGv2... + # TODO - This is probably overkill right now, but will revisit once the TileDB + # C API is better integrated in PHGv2... + + # Initial type checking + if (!is.vector(hVcfFiles) || !is.character(hVcfFiles)) { + rlang::abort("Input is not a valid 'character' vector") + } + + # Check if directory or files exist + procFiles <- NULL + hvcfExtPattern <- "\\.h\\.vcf(\\.gz)?$" + if (any(dir.exists(hVcfFiles))) { + # Get all files ending with .h.vcf or .h.vcf.gz in the directory + procFiles <- list.files( + path = hVcfFiles, + pattern = hvcfExtPattern, + full.names = TRUE + ) + + if (length(procFiles) == 0) { + rlang::abort("No files ending with .h.vcf or .h.vcf.gz found in provided directory") + } + } else if (any(file.exists(hVcfFiles))) { + if (any(grepl(hvcfExtPattern, hVcfFiles))) { + procFiles <- hVcfFiles[grepl(hvcfExtPattern, hVcfFiles)] + } + } else { + rlang::abort("The input path is neither a valid directory nor a valid file") + } + methods::new( Class = "PHGLocalCon", phgType = "local", host = NA_character_, # making this NA for now - hVcfFiles = normalizePath(hVcfFiles) + hVcfFiles = normalizePath(procFiles) ) } @@ -93,6 +122,7 @@ setMethod( present <- cli::col_green(cli::symbol$square_small_filled) absent <- cli::col_grey(cli::symbol$square_small) + #' TODO - implement with direct calls to TileDB dbUriStatus <- ifelse(is.na(object@host), absent, present) if (any(is.na(object@hVcfFiles))) { diff --git a/R/class_phg_dataset.R b/R/class_phg_dataset.R index 6651998..25e8a64 100644 --- a/R/class_phg_dataset.R +++ b/R/class_phg_dataset.R @@ -183,7 +183,7 @@ setMethod( uniqHaps <- data.frame( rr_id = colnames(hapIds), - n_haplo = apply(hapIds, 2, function(it) length(unique(it[it != "null"]))) + n_haplo = apply(hapIds, 2, function(it) length(unique(it[!is.na(it)]))) ) return(tibble::as_tibble( diff --git a/R/init_phg.R b/R/init_phg.R index 27b63df..927300b 100644 --- a/R/init_phg.R +++ b/R/init_phg.R @@ -14,6 +14,15 @@ #' #' @export initPhg <- function(phgPath, check = TRUE, verbose = TRUE) { + # Set up CLI message components + # Note: I **know** there are easier ways of accomplishing this but I + # need to do this 'ad-hoc' for JupyterHub notebooks! + cliSuccess <- cli::col_green(cli::symbol$tick) + cliInform <- cli::col_blue(cli::symbol$info) + cliNote <- cli::col_red(cli::symbol$warning) + cliWarn <- cli::col_red(cli::symbol$cross) + cliBullet <- cli::symbol$bullet + if (!is.character(phgPath) || !dir.exists(phgPath)) { rlang::abort("PHG library path ('phgPath') provided does not exist") } @@ -22,33 +31,27 @@ initPhg <- function(phgPath, check = TRUE, verbose = TRUE) { rlang::abort("Cannot find 'phg_v2.jar' in library path") } + # Check if JVM is already initialized - if it is, show warning message and + # move on + loadMsg <- paste0(cliSuccess, " PHG JARs added to class path") if (isJvmInitialized() && "phg_v2.jar" %in% basename(rJava::.jclassPath())) { - rlang::abort("PHGv2 JARs already added to class path") + loadMsg <- paste0(cliWarn, " PHGv2 JARs already added to class path - skipping...") + } else { + tryCatch( + { + rJava::.jinit() + rJava::.jaddClassPath(dir(phgPath, full.names = TRUE)) + }, + error = function(e) { + rlang::abort("Java initialization or class path addition failed", parent = e) + } + ) } - tryCatch( - { - rJava::.jinit() - rJava::.jaddClassPath(dir(phgPath, full.names = TRUE)) - }, - error = function(e) { - rlang::abort("Java initialization or class path addition failed", parent = e) - } - ) - - # Set up CLI message components - # Note: I **know** there are easier ways of accomplishing this but I - # need to do this 'ad-hoc' for JupyterHub notebooks! - cliSuccess <- cli::col_green(cli::symbol$tick) - cliInform <- cli::col_blue(cli::symbol$info) - cliWarn <- cli::col_red(cli::symbol$warning) - cliBullet <- cli::symbol$bullet - # Get version stats jvmStats <- jvmStats() # Get PHG version messages / set up load message - loadMsg <- paste0(cliSuccess, " PHG JARs added to class path") currentPhgVersion <- phgVersion(jvmStats) cliCurrent <- cli::style_bold(currentPhgVersion) phgMsg <- paste0(" ", cliBullet, " PHG version....: ", cliCurrent) @@ -75,7 +78,7 @@ initPhg <- function(phgPath, check = TRUE, verbose = TRUE) { phgWarnMsg <- sprintf( " %s\n %s %sCurrent version of PHGv2 (%s) is out of date (%s)\n %s consider updating (%s)", cli::symbol$line, - cliWarn, + cliNote, cli::style_bold(cli::symbol$sup_1), cliCurrent, cliLatest, diff --git a/R/read_hap_ids.R b/R/read_hap_ids.R index f6c57fc..7b56366 100644 --- a/R/read_hap_ids.R +++ b/R/read_hap_ids.R @@ -13,6 +13,10 @@ hapIdsFromJvmGraph <- function(jvmGraph, nThreads) { jm <- interface$getHapIdMatrixFromGraph(jvmGraph, nThreads) rm <- jm |> kotlin2DArrayToRMatrix() + # Convert missing IDs to R NAs + missingValues <- c("", "null") + rm[rm %in% missingValues] <- NA + return(rm) } diff --git a/README.md b/README.md index d770994..0fe0b04 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ `rPHG2` is a system to interact with and retrieve information from **version 2** -of the Practical Haplotype Graph (PHGv2) - a general, graph-based, computational -framework for genotype inference. This is accomplished by leveraging the -[Breeding](https://brapi.org/) and +of the [Practical Haplotype Graph (PHGv2)](https://phg.maizegenetics.net/) - a +general, graph-based, computational framework for genotype inference. This is +accomplished by leveraging the [Breeding](https://brapi.org/) and [PHG](https://github.com/maize-genetics/phg_v2) APIs. ## Installation @@ -30,8 +30,7 @@ initPhg("my/phg/jar/path") # Create a connection -locCon <- list.files("my/hvcf/dir") |> - PHGLocalCon() +locCon <- PHGLocalCon("my/hvcf/dir") # Build a graph diff --git a/man/PHGLocalCon-class.Rd b/man/PHGLocalCon-class.Rd index b3c8910..9ad6030 100644 --- a/man/PHGLocalCon-class.Rd +++ b/man/PHGLocalCon-class.Rd @@ -4,6 +4,14 @@ \name{PHGLocalCon-class} \alias{PHGLocalCon-class} \alias{show,PHGLocalCon-method} +\alias{TODO} +\alias{-} +\alias{implement} +\alias{with} +\alias{direct} +\alias{calls} +\alias{to} +\alias{TileDB} \title{A PHGLocalCon Class} \usage{ \S4method{show}{PHGLocalCon}(object) diff --git a/man/PHGLocalCon.Rd b/man/PHGLocalCon.Rd index 63df54a..54aae7d 100644 --- a/man/PHGLocalCon.Rd +++ b/man/PHGLocalCon.Rd @@ -7,8 +7,9 @@ PHGLocalCon(hVcfFiles) } \arguments{ -\item{hVcfFiles}{A list of \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF} -files as a \code{character} vector} +\item{hVcfFiles}{A path to a directory or file containing valid \href{https://github.com/maize-genetics/phg_v2/blob/main/docs/hvcf_specifications.md}{hVCF} +files (ending in either \code{.h.vcf} or \code{.h.vcf.gz}) as a +\code{character} vector} } \description{ Creates a \code{\linkS4class{PHGLocalCon}} object to be used to read PHG diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 2391925..7eaf9a8 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -112,7 +112,9 @@ initPhg(phgLibPath) ## Test post-initialization ---- testthat::test_that("JVM init checker works", { - testthat::expect_error(rPHG2::initPhg(phgLibPath)) + msg <- utils::capture.output(rPHG2::initPhg(phgLibPath)) + + testthat::expect_match(msg[1], "PHGv2 JARs already added to class path" ) }) diff --git a/tests/testthat/test_class_phg_con_local.R b/tests/testthat/test_class_phg_con_local.R index b5ba341..fdd5298 100644 --- a/tests/testthat/test_class_phg_con_local.R +++ b/tests/testthat/test_class_phg_con_local.R @@ -1,11 +1,22 @@ -test_that("Local connection tests", { +test_that("General local connection tests", { hVcfFileDir <- system.file("extdata", package = "rPHG2") hVcfFiles <- list.files(hVcfFileDir, pattern = ".h.vcf$", full.names = TRUE) locCon <- PHGLocalCon(hVcfFiles) - locConOutput <- utils::capture.output(locCon) + expect_error(PHGLocalCon(mtcars)) + expect_error(PHGLocalCon(tempdir())) + expect_error(PHGLocalCon(tempfile())) + locConOutput <- utils::capture.output(locCon) expect_equal(length(locConOutput), 3) }) + +test_that("Local connection from directory", { + hVcfFileDir <- system.file("extdata", package = "rPHG2") + locCon <- PHGLocalCon(hVcfFileDir) + locConOutput <- utils::capture.output(locCon) + expect_equal(length(locConOutput), 3) + +}) diff --git a/vignettes/rPHG2.Rmd b/vignettes/rPHG2.Rmd index 0409f86..537498a 100644 --- a/vignettes/rPHG2.Rmd +++ b/vignettes/rPHG2.Rmd @@ -94,6 +94,15 @@ localCon <- hVcfFiles |> PHGLocalCon() localCon ``` +Additionally, if you have a **directory** of valid hVCF files, you can simply +pass the directory path to the constructor. In the following example, I will +point to the `extdata` data directory _without_ pointing to singular hVCF files +that is found within the package: + +```{r, eval=TRUE, echo=TRUE} +system.file("extdata", package = "rPHG2") |> PHGLocalCon() +``` + From here, we can move to the [next section](#creating-jvm-objects) to create a `HaplotypeGraph` object interface with the JVM.