From dc2a11eceaaed2ababb9ba153df19c7edbda46f4 Mon Sep 17 00:00:00 2001 From: "Win Cowger, PhD" Date: Fri, 8 Dec 2023 15:51:42 -0800 Subject: [PATCH] seems to be working on extended example cases too --- R/match_spec.R | 23 +++++++++++++++++++++-- man/match_spec.Rd | 16 ++++++++++++++-- tests/testthat/test-match_spec.R | 12 ++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/R/match_spec.R b/R/match_spec.R index 16a616a9..aab42e3c 100644 --- a/R/match_spec.R +++ b/R/match_spec.R @@ -12,7 +12,7 @@ #' \code{max_cor_named()} formats the top correlation values from a correlation #' matrix as a named vector. #' \code{filter_spec()} filters an Open Specy object. -#' \code{os_similarity()} EXPERIMENTAL, returns a single similarity metric between two OpenSpecy objects based on the method. +#' \code{os_similarity()} EXPERIMENTAL, returns a single similarity metric between two OpenSpecy objects based on the method used. #' @param x an \code{OpenSpecy} object, typically with unknowns. #' @param conform Whether to conform the spectra to the library wavenumbers or not. #' @param type the type of conformation to make returned by \code{conform_spec()} @@ -55,7 +55,19 @@ #' \code{cor_spec()} returns a correlation matrix. #' \code{get_metadata()} returns a \code{\link[data.table]{data.table-class}()} #' with the metadata for columns which have information. -#' \code{os_similarity()} returns a single numeric value representing the type of similarity metric requested. +#' \code{os_similarity()} returns a single numeric value representing the type +#' of similarity metric requested. 'wavenumber' similarity is based on the +#' proportion of wavenumber values that overlap between the two objects, +#' 'metadata' is the proportion of metadata column names, +#' 'hamming' is something similar to the hamming distance where we discretize +#' all spectra in the OpenSpecy object by wavenumber intensity values and then +#' relate the wavenumber intensity value distributions by mean difference in +#' min-max normalized space. 'pca' tests the distance between the OpenSpecy +#' objects in PCA space using the first 4 component values and calculating the +#' max-range normalized distance between the mean components. The first two +#' metrics are pretty straightforward and definitely ready to go, the 'hamming' +#' and 'pca' metrics are pretty experimental but appear to be working under our +#' current test cases. #' #' @examples #' data("test_lib") @@ -392,6 +404,12 @@ os_similarity.OpenSpecy <- function(x, y, method = "hamming", na.rm = T, ...) { collapse = " ")), call. = F) + if(ncol(x$spectra) + ncol(y$spectra) < 8 & method == "pca") + stop("There must be at least 8 spectra total combined from the two Open Specy objects", + "to conduct the pca analysis. Consider using the hamming distance if you want a multispectra-metric", + "with fewer spectra.", + call. = F) + spec_y <- y$spectra[y$wavenumber %in% x$wavenumber, ] spec_y <- spec_y[, lapply(.SD, make_rel, na.rm = na.rm)] spec_y <- spec_y[, lapply(.SD, mean_replace)] @@ -401,6 +419,7 @@ os_similarity.OpenSpecy <- function(x, y, method = "hamming", na.rm = T, ...) { } if(method == "pca"){ + perform_combined_pca <- function(spec_obj1, spec_obj2) { # Extract intensities and transpose intensities1 <- t(spec_obj1) diff --git a/man/match_spec.Rd b/man/match_spec.Rd index e203b862..5c98993c 100644 --- a/man/match_spec.Rd +++ b/man/match_spec.Rd @@ -140,7 +140,19 @@ will be added to the output. \code{cor_spec()} returns a correlation matrix. \code{get_metadata()} returns a \code{\link[data.table]{data.table-class}()} with the metadata for columns which have information. -\code{os_similarity()} returns a single numeric value representing the type of similarity metric requested. +\code{os_similarity()} returns a single numeric value representing the type +of similarity metric requested. 'wavenumber' similarity is based on the +proportion of wavenumber values that overlap between the two objects, +'metadata' is the proportion of metadata column names, +'hamming' is something similar to the hamming distance where we discretize +all spectra in the OpenSpecy object by wavenumber intensity values and then +relate the wavenumber intensity value distributions by mean difference in +min-max normalized space. 'pca' tests the distance between the OpenSpecy +objects in PCA space using the first 4 component values and calculating the +max-range normalized distance between the mean components. The first two +metrics are pretty straightforward and definitely ready to go, the 'hamming' +and 'pca' metrics are pretty experimental but appear to be working under our +current test cases. } \description{ \code{match_spec()} joins two \code{OpenSpecy} objects and their metadata @@ -153,7 +165,7 @@ and formats them with metadata. \code{max_cor_named()} formats the top correlation values from a correlation matrix as a named vector. \code{filter_spec()} filters an Open Specy object. -\code{os_similarity()} EXPERIMENTAL, returns a single similarity metric between two OpenSpecy objects based on the method. +\code{os_similarity()} EXPERIMENTAL, returns a single similarity metric between two OpenSpecy objects based on the method used. } \examples{ data("test_lib") diff --git a/tests/testthat/test-match_spec.R b/tests/testthat/test-match_spec.R index 96877b9a..49221069 100644 --- a/tests/testthat/test-match_spec.R +++ b/tests/testthat/test-match_spec.R @@ -7,6 +7,8 @@ data("raman_hdpe") CA_test_lib <- filter_spec(test_lib, test_lib$metadata$SpectrumIdentity == "CA" ) +hdpe_test_lib <- filter_spec(test_lib, test_lib$metadata$sample_name == "0031bb13faea1e04b52ffbeca009e8ab") + tiny_map <- read_extdata("CA_tiny_map.zip") |> read_any() |> conform_spec(range = test_lib$wavenumber, @@ -34,8 +36,18 @@ test_that("os_similarity() returns correct values", { CA2 <- conform_spec(CA_test_lib, tiny_map$wavenumber, res = NULL, type = "roll") + hdpe2 <- conform_spec(hdpe_test_lib, tiny_map$wavenumber, res = NULL, type = "roll") + unknown2 <- conform_spec(unknown, tiny_map$wavenumber, res = NULL, type = "roll") + ramanhdpe2 <- conform_spec(raman_hdpe, tiny_map$wavenumber, res = NULL, type = "roll") |> + smooth_intens() + + #hamming still calculates a value for single spectra comparisons but probably not a great metric. + expect_true(os_similarity(ramanhdpe2, hdpe2) > os_similarity(ramanhdpe2,CA2)) |> expect_warning() |> expect_warning() + + os_similarity(ramanhdpe2, hdpe2, method = "pca") |> expect_error() |> expect_warning() + expect_true(os_similarity(test_lib2, test_lib2) > os_similarity(tiny_map, test_lib2)) expect_true(os_similarity(tiny_map, CA2) > os_similarity(tiny_map, unknown2)) |> expect_warning() expect_true(os_similarity(tiny_map, CA2, method = "pca") > os_similarity(x = tiny_map, y = unknown2, method = "pca")) |> expect_warning()