update binary vignette

waldronlab · Dec 4, 2023 · d498e81 · d498e81
1 parent a3cdb2e
commit d498e81
Showing 1 changed file with 140 additions and 0 deletions.
diff --git a/vignettes/articles/uncertainty_binary_2.Rmd b/vignettes/articles/uncertainty_binary_2.Rmd
@@ -0,0 +1,140 @@
+---
+title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations"
+output:
+  html_document:
+    toc: true
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(ape)
+library(phytools)
+library(dplyr)
+library(tidyr)
+```
+
+A few attributes in bugphyzz only have 'TRUE' annotations.
+Other attributes have both 'TRUE' and 'FALSE' annotations.
+
+Tips that are uncertain could be treated as FALSE annotations (imputed data) or 
+uncertain annotations with prior probabilities set to 0.5 for TRUE and 0.5 for
+FALSE. In any case, ASR is not very reliable with any of these approaches,
+especially when the percentage of tips with annotations is low.
+
+```{r}
+data('primate.tree')
+data('primate.data')
+tree <- primate.tree
+data <- primate.data
+data <- data[tree$tip.label,]
+# rownames(data) <- paste0('taxon', 1:nrow(data))
+# tree$tip.label <- paste0('taxon', 1:Ntip(tree))
+original <- data |>
+    tibble::rownames_to_column(var = 'Taxa') |>
+    select(Taxa, Activity_pattern) |>
+    mutate(Presence = 1) |>
+    pivot_wider(
+        names_from = 'Activity_pattern', values_from = 'Presence',
+
+                values_fill = 0
+    ) |>
+    arrange(Taxa) |>
+    tibble::column_to_rownames(var = 'Taxa') |>
+    select(Diurnal) |> 
+    mutate(
+        not_diurnal = ifelse(Diurnal == 0, 1, 0)
+    ) |> 
+    as.matrix()
+colnames(original) <- c('A--TRUE', 'A--FALSE')
+
+
+# myFun <- function(mat, uncertainty = 0.7, input_value = 0) {
+#     m <- mat
+#     if (input_value == 0.5) {
+#         m[] <- input_value
+#     } else if (input_value == 0) {
+#         m[,1] <- 0
+#         m[,2] <- 1
+#     }
+#     n <- round(nrow(mat) * (1 - uncertainty))
+#     a <- round(n / 3)
+#     b <- n - a
+#     nm1 <- sample(rownames(mat), b)
+#     nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a)
+#     m[nm1, 1] <- 1
+#     m[nm1, 2] <- 0
+#     m[nm2, 1] <- 0
+#     m[nm2, 2] <- 1
+#     return(m)
+# }
+head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE))
+```
+
+
+# Original
+
+```{r}
+fit <- fitMk(tree = tree, x = original,
+              model = "ARD", pi = "fitzjohn",
+              lik.func = "pruning", logscale = TRUE)
+ace <- ancr(fit, tips=TRUE)
+plot(ace, args.plotTree = list(direction="upwards"))
+title(main = '0% uncertain tips', line = -1)
+```
+
+
+```{r}
+m1 <- original
+m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1
+m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0
+m1[,2] <- ifelse(m1[,1] == 1, 0, 1)
+fit1 <- fitMk(tree = tree, x = m1,
+              model = "ARD", pi = "fitzjohn",
+              lik.func = "pruning", logscale = TRUE)
+ace1 <- ancr(fit1, tips=TRUE)
+plot(ace1, args.plotTree = list(direction = "upwards"))
+title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns')
+```
+
+```{r}
+m2 <- original
+m2[] <- 0.5
+m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1
+m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5)
+fit2 <- fitMk(tree = tree, x = m2,
+              model = "ARD", pi = "fitzjohn",
+              lik.func = "pruning", logscale = TRUE)
+ace2 <- ancr(fit2, tips=TRUE)
+plot(ace2, args.plotTree = list(direction = "upwards"))
+title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns')
+```
+
+
+```{r}
+m3 <- original
+m3[] <- 0.5
+m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1
+m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0
+m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1
+m3[which(grepl('^Galago', rownames(m3))), 1] <- 0
+fit3 <- fitMk(tree = tree, x = m3,
+              model = "ARD", pi = "fitzjohn",
+              lik.func = "pruning", logscale = TRUE)
+ace3 <- ancr(fit3, tips=TRUE)
+plot(ace3, args.plotTree = list(direction = "upwards"))
+title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns')
+```
+
+
+# Session information
+
+```{r}
+sessioninfo::session_info()
+```
+