Skip to content

Commit

Permalink
add similarity and now tests are working.
Browse files Browse the repository at this point in the history
  • Loading branch information
wincowgerDEV committed Dec 8, 2023
1 parent 755a49e commit 2c3cf9e
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 44 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ importFrom(stats,lm)
importFrom(stats,median)
importFrom(stats,model.frame)
importFrom(stats,poly)
importFrom(stats,prcomp)
importFrom(stats,predict)
importFrom(stats,sd)
importFrom(stats,setNames)
Expand Down
105 changes: 100 additions & 5 deletions R/match_spec.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@
#' \code{\link{load_lib}()} loads the Open Specy reference library into an \R
#' object of choice
#'
#' @importFrom stats cor predict
#' @importFrom stats cor predict prcomp
#' @importFrom glmnet predict.glmnet
#' @importFrom data.table data.table setorder fifelse .SD as.data.table rbindlist
#' @importFrom data.table data.table setorder fifelse .SD as.data.table rbindlist transpose
#' @export
cor_spec <- function(x, ...) {
UseMethod("cor_spec")
Expand Down Expand Up @@ -371,8 +371,103 @@ os_similarity.default <- function(x, ...) {
#' @rdname match_spec
#'
#' @export
os_similarity.OpenSpecy <- function(x, y, method = "cor_spec", ...) {
if(method == "cor_spec"){
mean(cor_spec(x, y, ...))
os_similarity.OpenSpecy <- function(x, y, method = "hamming", na.rm = T, ...) {
if(method == "wavenumber"){
series = c(x$wavenumber, y$wavenumber)
return(sum(duplicated(series))/length(unique(series)))
}
if(method %in% c("pca", "hamming")){
if(sum(x$wavenumber %in% y$wavenumber) < 3)
stop("there are less than 3 matching wavenumbers in the objects you are ",
"trying to correlate; this won't work for correlation analysis. ",
"Consider first conforming the spectra to the same wavenumbers.",
call. = F)

series = c(x$wavenumber, y$wavenumber)

if(sum(duplicated(series))/length(unique(series)) != 1)
warning(paste0("some wavenumbers in 'x' are not in the 'y' and the ",
"function is not using these in the identification routine: ",
paste(unique(c(x$wavenumber[!x$wavenumber %in% y$wavenumber], y$wavenumber[!y$wavenumber %in% x$wavenumber])),
collapse = " ")),
call. = F)

spec_y <- y$spectra[y$wavenumber %in% x$wavenumber, ]
spec_y <- spec_y[, lapply(.SD, make_rel, na.rm = na.rm)]
spec_y <- spec_y[, lapply(.SD, mean_replace)]
spec_x <- x$spectra[x$wavenumber %in% y$wavenumber,]
spec_x <- spec_x[,lapply(.SD, make_rel, na.rm = na.rm)]
spec_x <- spec_x[, lapply(.SD, mean_replace)]

}
if(method == "pca"){
perform_combined_pca <- function(spec_obj1, spec_obj2) {
# Extract intensities and transpose
intensities1 <- t(spec_obj1)
intensities2 <- t(spec_obj2)

# Combine the datasets
combined_intensities <- rbind(intensities1, intensities2)

# Perform PCA
pca_result <- prcomp(combined_intensities, scale. = TRUE)

# Determine the index range for each dataset
index_spec_obj1 <- 1:nrow(intensities1)
index_spec_obj2 <- (nrow(intensities1) + 1):(nrow(intensities1) + nrow(intensities2))

# Extract PCA results for each dataset
pca_spec_obj1 <- pca_result$x[index_spec_obj1, 1:4]
pca_spec_obj2 <- pca_result$x[index_spec_obj2, 1:4]

# Calculate central locations

pca_range <- apply(pca_result$x[,1:4], 2, function(column) abs(max(column) - min(column)))

if(is.null(dim(pca_spec_obj1))){
central_loc1 <- pca_spec_obj1
}
else{
central_loc1 <- colMeans(pca_spec_obj1)
}
if(is.null(dim(pca_spec_obj2))){
central_loc2 <- pca_spec_obj2
}
else{
central_loc2 <- colMeans(pca_spec_obj2)
}

return(list(central_loc1, central_loc2, pca_range))
}

central_locs <- perform_combined_pca(spec_obj1 = spec_x, spec_obj2 = spec_y)
return(
1-mean(abs(central_locs[[1]] - central_locs[[2]])/central_locs[[3]])
)
}
if(method == "hamming"){
spec_y <- transpose(spec_y)
spec_y <- spec_y[,lapply(.SD, function(x){
values <- make_rel(table(round(x,1)))
sequence <- seq(0, 1, by = 0.1)
empty <- numeric(length = length(sequence))
empty[match(names(values), seq(0, 1, by = 0.1))] <- values
ifelse(is.nan(empty), 1, empty)
})]

spec_x <- transpose(spec_x)
spec_x <- spec_x[,lapply(.SD, function(x){
values <- make_rel(table(round(x,1)))
sequence <- seq(0, 1, by = 0.1)
empty <- numeric(length = length(sequence))
empty[match(names(values), seq(0, 1, by = 0.1))] <- values
ifelse(is.nan(empty), 1, empty)
})]

return(1 - unlist(abs(spec_x - spec_y)) |> mean(na.rm = T))
}
if(method == "metadata"){
series = c(names(x$metadata), names(y$metadata))
return(sum(duplicated(series))/length(unique(series)))
}
}
2 changes: 1 addition & 1 deletion man/match_spec.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 18 additions & 38 deletions tests/testthat/test-match_spec.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,48 +18,28 @@ preproc <- conform_spec(unknown, range = test_lib$wavenumber,
res = spec_res(test_lib)) |>
process_spec(smooth_intens = T, make_rel = T)

test_that("os_similarity() handles input errors correctly", {
expect_true(os_similarity(tiny_map, test_lib) < os_similarity(tiny_map, tiny_map))
expect_true(os_similarity(test_lib, test_lib) > os_similarity(tiny_map, test_lib))
cor_spec(test_lib, tiny_map) |>
median()
test_that("os_similarity() returns correct values", {
#The basic definition of similarity for each
expect_true(os_similarity(tiny_map, tiny_map, method = "hamming") == 1)
expect_true(os_similarity(tiny_map, tiny_map, method = "pca") == 1)
expect_true(os_similarity(tiny_map, tiny_map, method = "metadata") == 1)
expect_true(os_similarity(tiny_map, tiny_map, method = "wavenumber") == 1)

#Wavenumbers
expect_identical(os_similarity(x = tiny_map, y = test_lib, method = "wavenumber") |> round(2), 0.84)
expect_identical(os_similarity(x = tiny_map, y = tiny_map, method = "wavenumber") |> round(2), 1)
expect_true(os_similarity(x = tiny_map, y = test_lib, method = "wavenumber") > os_similarity(x = unknown, y = test_lib, method = "wavenumber"))

test_lib2 <- conform_spec(test_lib, tiny_map$wavenumber, res = NULL, type = "roll")
spectra <- transpose(test_lib2$spectra)
spectra2 <- spectra[,lapply(.SD, function(x){
values <- make_rel(table(round(x,1)))
sequence <- seq(0, 1, by = 0.1)
empty <- numeric(length = length(sequence))
empty[match(names(values), seq(0, 1, by = 0.1))] <- values
empty
})]
CA2 <- conform_spec(CA_test_lib, tiny_map$wavenumber, res = NULL, type = "roll")
CAspectra <- transpose(CA2$spectra)
CAspectra2 <- CAspectra[,lapply(.SD, function(x){
values <- make_rel(table(round(x,1)))
sequence <- seq(0, 1, by = 0.1)
empty <- numeric(length = length(sequence))
empty[match(names(values), seq(0, 1, by = 0.1))] <- values
ifelse(is.nan(empty), 1, empty)
})]
unspectra <- transpose(tiny_map$spectra)
unspectra2 <- unspectra[,lapply(.SD, function(x){
values <- make_rel(table(round(x,1)))
sequence <- seq(0, 1, by = 0.1)
empty <- numeric(length = length(sequence))
empty[match(names(values), seq(0, 1, by = 0.1))] <- values
empty
})]
distance1 <- unlist(abs(unspectra2 - unspectra2)) |> mean(na.rm = T)
distance2 <- unlist(abs(unspectra2 - spectra2)) |> mean(na.rm = T)
distance3 <- unlist(abs(CAspectra2 - unspectra2)) |> mean(na.rm = T)
test <- abs(CAspectra2 - unspectra2)

ggplot() +
geom_line(aes(x = seq(0,1, by = 0.1), y = unspectra2[[30]]))
CA2 <- conform_spec(CA_test_lib, tiny_map$wavenumber, res = NULL, type = "roll")

os_similarity(raman_hdpe, raman_hdpe) |>
expect_equal(1)
unknown2 <- conform_spec(unknown, tiny_map$wavenumber, res = NULL, type = "roll")

expect_true(os_similarity(test_lib2, test_lib2) > os_similarity(tiny_map, test_lib2))
expect_true(os_similarity(tiny_map, CA2) > os_similarity(tiny_map, unknown2)) |> expect_warning()
expect_true(os_similarity(tiny_map, CA2, method = "pca") > os_similarity(x = tiny_map, y = unknown2, method = "pca")) |> expect_warning()
expect_true(os_similarity(tiny_map, raman_hdpe, method = "metadata") == 0.25)
})

test_that("ai_classify() handles input errors correctly", {
Expand Down

0 comments on commit 2c3cf9e

Please sign in to comment.