From 567f95a790076a77fa99cbda79594287f5a8993d Mon Sep 17 00:00:00 2001 From: sdgamboa Date: Mon, 4 Dec 2023 13:39:24 -0500 Subject: [PATCH] update vignette names --- vignettes/articles/uncertainty_binary.Rmd | 181 +++++------------- vignettes/articles/uncertainty_binary_2.Rmd | 140 -------------- ...ty_tips.Rmd => uncertainty_multistate.Rmd} | 0 3 files changed, 53 insertions(+), 268 deletions(-) delete mode 100644 vignettes/articles/uncertainty_binary_2.Rmd rename vignettes/articles/{effect_uncertainty_tips.Rmd => uncertainty_multistate.Rmd} (100%) diff --git a/vignettes/articles/uncertainty_binary.Rmd b/vignettes/articles/uncertainty_binary.Rmd index a85d301..c36717c 100644 --- a/vignettes/articles/uncertainty_binary.Rmd +++ b/vignettes/articles/uncertainty_binary.Rmd @@ -1,5 +1,5 @@ --- -title: "Uncertainty in tips for binary attributes when there are only 'TRUE' annotations" +title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations" output: html_document: toc: true @@ -33,8 +33,8 @@ data('primate.data') tree <- primate.tree data <- primate.data data <- data[tree$tip.label,] -rownames(data) <- paste0('taxon', 1:nrow(data)) -tree$tip.label <- paste0('taxon', 1:Ntip(tree)) +# rownames(data) <- paste0('taxon', 1:nrow(data)) +# tree$tip.label <- paste0('taxon', 1:Ntip(tree)) original <- data |> tibble::rownames_to_column(var = 'Taxa') |> select(Taxa, Activity_pattern) |> @@ -53,16 +53,32 @@ original <- data |> as.matrix() colnames(original) <- c('A--TRUE', 'A--FALSE') -myFun <- function(mat, per_uncertain = 0.1) { - n_row <- nrow(mat) - n <- round(n_row * per_uncertain) - rows <- sample(x = 1:nrow(mat), size = n, replace = FALSE) - mat[rows,] <- rep(1/ncol(mat), ncol(mat)) - mat -} + +# myFun <- function(mat, uncertainty = 0.7, input_value = 0) { +# m <- mat +# if (input_value == 0.5) { +# m[] <- input_value +# } else if (input_value == 0) { +# m[,1] <- 0 +# m[,2] <- 1 +# } +# n <- round(nrow(mat) * (1 - uncertainty)) +# a <- round(n / 3) +# b <- n - a +# nm1 <- sample(rownames(mat), b) +# nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a) +# m[nm1, 1] <- 1 +# m[nm1, 2] <- 0 +# m[nm2, 1] <- 0 +# m[nm2, 2] <- 1 +# return(m) +# } +head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE)) ``` -# No uncertainty + +# Original + ```{r} fit <- fitMk(tree = tree, x = original, model = "ARD", pi = "fitzjohn", @@ -72,136 +88,49 @@ plot(ace, args.plotTree = list(direction="upwards")) title(main = '0% uncertain tips', line = -1) ``` -# Assigning 0.5 for TRUE and FALSE for annotations - -This doesn't really work when all annotations are positive - -## 99% uncertainty - -In this case, a single annotation overrides all other annotations. -99% of tips are uncertain (0.5 TRUE and 0.5 FALSE). ```{r} -set.seed(1234) -m99 <- myFun(original, 0.99) -m99 <- m99[tree$tip.label,] -fit99 <- fitMk(tree = tree, x = m99, +m1 <- original +m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1 +m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0 +m1[,2] <- ifelse(m1[,1] == 1, 0, 1) +fit1 <- fitMk(tree = tree, x = m1, model = "ARD", pi = "fitzjohn", lik.func = "pruning", logscale = TRUE) -ace99 <- ancr(fit99, tips=TRUE) -plot(ace99, args.plotTree = list(direction = "upwards")) -title(main = '99% uncertain tips', line = -1) +ace1 <- ancr(fit1, tips=TRUE) +plot(ace1, args.plotTree = list(direction = "upwards")) +title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns') ``` -## 95% uncertainty - ```{r} -set.seed(1234) ## with this seed all values are TRUE that is what I left it here -m95 <- myFun(original, 0.95) -m95 <- m95[tree$tip.label,] -fit95 <- fitMk(tree = tree, x = m95, +m2 <- original +m2[] <- 0.5 +m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1 +m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5) +fit2 <- fitMk(tree = tree, x = m2, model = "ARD", pi = "fitzjohn", lik.func = "pruning", logscale = TRUE) -ace95 <- ancr(fit95, tips=TRUE) -plot(ace95, args.plotTree = list(direction="upwards")) -title(main = '95% uncertain tips', line = -1) - +ace2 <- ancr(fit2, tips=TRUE) +plot(ace2, args.plotTree = list(direction = "upwards")) +title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns') ``` -## 25% uncertainty - ```{r} -# set.seed(1234) -# m75 <- myFun(original, 0.75) -# m75 <- m75[tree$tip.label,] -pos1 <- sample(1:nrow(original), round(nrow(original) * 0.75), replace = FALSE) -pos0.5 <- (1:nrow(original))[!1:nrow(original) %in% pos1] -m25 <- original -m25[] <- 0.5 -m25[pos1, 1] <- 1 -m25[pos1, 2] <- 0 -fit25 <- fitMk(tree = tree, x = m25, +m3 <- original +m3[] <- 0.5 +m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1 +m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0 +m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1 +m3[which(grepl('^Galago', rownames(m3))), 1] <- 0 +fit3 <- fitMk(tree = tree, x = m3, model = "ARD", pi = "fitzjohn", lik.func = "pruning", logscale = TRUE) -m25 <- ancr(fit25, tips=TRUE) -plot(m25, args.plotTree = list(direction = "upwards")) -title(main = '25% uncertain tips', line = -1) -``` - - - -# Imputed zeros - -```{r} -myFun2 <- function(vct, per_ones = 0.1) { - ones <- which(vct == 1) - zeros <- which(vct == 0) - perN <- round(per_ones * length(vct)) - - if (perN == length(ones)) { - new_vct <- vct - } else if (perN < length(ones)) { - keep_ones <- sample(ones, perN, replace = FALSE) - new_vct <- rep(0, length(vct)) - new_vct[keep_ones] <- 1 - } else if (perN > length(ones)) { - need_more <- perN - length(ones) - new_ones <- sample(zeros, need_more, replace = FALSE) - new_vct <- vct - new_vct[new_ones] <- 1 - } - return(new_vct) -} - +ace3 <- ancr(fit3, tips=TRUE) +plot(ace3, args.plotTree = list(direction = "upwards")) +title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns') ``` -## 95% imputed zeros - -```{r} -x1 <- original -set.seed(12343) -x1[,1] <- myFun2(x1[,1], 0.05) -x1[,2] <- ifelse(x1[,1] == 1, 0, 1) - -x1 <- x1[tree$tip.label,] -fit_x1 <- fitMk(tree = tree, x = x1, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace_x1 <- ancr(fit_x1, tips=TRUE) -plot(ace_x1, args.plotTree = list(direction="upwards")) -title(main = '95% imputed zeros A--FALSE', line = -1) -``` - -## 75% imputed zeros - -```{r} -x2 <- original -set.seed(12343) -x2[,1] <- myFun2(x2[,1], 0.25) -x2[,2] <- ifelse(x2[,1] == 1, 0, 1) - -x2 <- x2[tree$tip.label,] -fit_x2 <- fitMk(tree = tree, x = x2, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace_x2 <- ancr(fit_x2, tips=TRUE) -plot(ace_x2, args.plotTree = list(direction="upwards")) -title(main = '75% imputed zeros A--FALSE', line = -1) -``` - -## As currently implemented - -Maybe just a few will have annotations above the threholds. -I don't think we should import FALSE values since they were all inputed. - -```{r} -hist( - ace_x2$ace[Ntip(tree) + 1:Nnode(tree), 1], - main = 'Scores of internal nodes TRUE', xlab = 'Score' - -) -``` # Session information @@ -209,7 +138,3 @@ hist( sessioninfo::session_info() ``` - - - - diff --git a/vignettes/articles/uncertainty_binary_2.Rmd b/vignettes/articles/uncertainty_binary_2.Rmd deleted file mode 100644 index c36717c..0000000 --- a/vignettes/articles/uncertainty_binary_2.Rmd +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations" -output: - html_document: - toc: true ---- - -```{r, include = FALSE} -knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" -) -``` - -```{r setup} -library(ape) -library(phytools) -library(dplyr) -library(tidyr) -``` - -A few attributes in bugphyzz only have 'TRUE' annotations. -Other attributes have both 'TRUE' and 'FALSE' annotations. - -Tips that are uncertain could be treated as FALSE annotations (imputed data) or -uncertain annotations with prior probabilities set to 0.5 for TRUE and 0.5 for -FALSE. In any case, ASR is not very reliable with any of these approaches, -especially when the percentage of tips with annotations is low. - -```{r} -data('primate.tree') -data('primate.data') -tree <- primate.tree -data <- primate.data -data <- data[tree$tip.label,] -# rownames(data) <- paste0('taxon', 1:nrow(data)) -# tree$tip.label <- paste0('taxon', 1:Ntip(tree)) -original <- data |> - tibble::rownames_to_column(var = 'Taxa') |> - select(Taxa, Activity_pattern) |> - mutate(Presence = 1) |> - pivot_wider( - names_from = 'Activity_pattern', values_from = 'Presence', - - values_fill = 0 - ) |> - arrange(Taxa) |> - tibble::column_to_rownames(var = 'Taxa') |> - select(Diurnal) |> - mutate( - not_diurnal = ifelse(Diurnal == 0, 1, 0) - ) |> - as.matrix() -colnames(original) <- c('A--TRUE', 'A--FALSE') - - -# myFun <- function(mat, uncertainty = 0.7, input_value = 0) { -# m <- mat -# if (input_value == 0.5) { -# m[] <- input_value -# } else if (input_value == 0) { -# m[,1] <- 0 -# m[,2] <- 1 -# } -# n <- round(nrow(mat) * (1 - uncertainty)) -# a <- round(n / 3) -# b <- n - a -# nm1 <- sample(rownames(mat), b) -# nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a) -# m[nm1, 1] <- 1 -# m[nm1, 2] <- 0 -# m[nm2, 1] <- 0 -# m[nm2, 2] <- 1 -# return(m) -# } -head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE)) -``` - - -# Original - -```{r} -fit <- fitMk(tree = tree, x = original, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace <- ancr(fit, tips=TRUE) -plot(ace, args.plotTree = list(direction="upwards")) -title(main = '0% uncertain tips', line = -1) -``` - - -```{r} -m1 <- original -m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1 -m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0 -m1[,2] <- ifelse(m1[,1] == 1, 0, 1) -fit1 <- fitMk(tree = tree, x = m1, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace1 <- ancr(fit1, tips=TRUE) -plot(ace1, args.plotTree = list(direction = "upwards")) -title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns') -``` - -```{r} -m2 <- original -m2[] <- 0.5 -m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1 -m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5) -fit2 <- fitMk(tree = tree, x = m2, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace2 <- ancr(fit2, tips=TRUE) -plot(ace2, args.plotTree = list(direction = "upwards")) -title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns') -``` - - -```{r} -m3 <- original -m3[] <- 0.5 -m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1 -m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0 -m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1 -m3[which(grepl('^Galago', rownames(m3))), 1] <- 0 -fit3 <- fitMk(tree = tree, x = m3, - model = "ARD", pi = "fitzjohn", - lik.func = "pruning", logscale = TRUE) -ace3 <- ancr(fit3, tips=TRUE) -plot(ace3, args.plotTree = list(direction = "upwards")) -title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns') -``` - - -# Session information - -```{r} -sessioninfo::session_info() -``` - diff --git a/vignettes/articles/effect_uncertainty_tips.Rmd b/vignettes/articles/uncertainty_multistate.Rmd similarity index 100% rename from vignettes/articles/effect_uncertainty_tips.Rmd rename to vignettes/articles/uncertainty_multistate.Rmd