-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
140 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
--- | ||
title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations" | ||
output: | ||
html_document: | ||
toc: true | ||
--- | ||
|
||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
```{r setup} | ||
library(ape) | ||
library(phytools) | ||
library(dplyr) | ||
library(tidyr) | ||
``` | ||
|
||
A few attributes in bugphyzz only have 'TRUE' annotations. | ||
Other attributes have both 'TRUE' and 'FALSE' annotations. | ||
|
||
Tips that are uncertain could be treated as FALSE annotations (imputed data) or | ||
uncertain annotations with prior probabilities set to 0.5 for TRUE and 0.5 for | ||
FALSE. In any case, ASR is not very reliable with any of these approaches, | ||
especially when the percentage of tips with annotations is low. | ||
|
||
```{r} | ||
data('primate.tree') | ||
data('primate.data') | ||
tree <- primate.tree | ||
data <- primate.data | ||
data <- data[tree$tip.label,] | ||
# rownames(data) <- paste0('taxon', 1:nrow(data)) | ||
# tree$tip.label <- paste0('taxon', 1:Ntip(tree)) | ||
original <- data |> | ||
tibble::rownames_to_column(var = 'Taxa') |> | ||
select(Taxa, Activity_pattern) |> | ||
mutate(Presence = 1) |> | ||
pivot_wider( | ||
names_from = 'Activity_pattern', values_from = 'Presence', | ||
values_fill = 0 | ||
) |> | ||
arrange(Taxa) |> | ||
tibble::column_to_rownames(var = 'Taxa') |> | ||
select(Diurnal) |> | ||
mutate( | ||
not_diurnal = ifelse(Diurnal == 0, 1, 0) | ||
) |> | ||
as.matrix() | ||
colnames(original) <- c('A--TRUE', 'A--FALSE') | ||
# myFun <- function(mat, uncertainty = 0.7, input_value = 0) { | ||
# m <- mat | ||
# if (input_value == 0.5) { | ||
# m[] <- input_value | ||
# } else if (input_value == 0) { | ||
# m[,1] <- 0 | ||
# m[,2] <- 1 | ||
# } | ||
# n <- round(nrow(mat) * (1 - uncertainty)) | ||
# a <- round(n / 3) | ||
# b <- n - a | ||
# nm1 <- sample(rownames(mat), b) | ||
# nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a) | ||
# m[nm1, 1] <- 1 | ||
# m[nm1, 2] <- 0 | ||
# m[nm2, 1] <- 0 | ||
# m[nm2, 2] <- 1 | ||
# return(m) | ||
# } | ||
head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE)) | ||
``` | ||
|
||
|
||
# Original | ||
|
||
```{r} | ||
fit <- fitMk(tree = tree, x = original, | ||
model = "ARD", pi = "fitzjohn", | ||
lik.func = "pruning", logscale = TRUE) | ||
ace <- ancr(fit, tips=TRUE) | ||
plot(ace, args.plotTree = list(direction="upwards")) | ||
title(main = '0% uncertain tips', line = -1) | ||
``` | ||
|
||
|
||
```{r} | ||
m1 <- original | ||
m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1 | ||
m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0 | ||
m1[,2] <- ifelse(m1[,1] == 1, 0, 1) | ||
fit1 <- fitMk(tree = tree, x = m1, | ||
model = "ARD", pi = "fitzjohn", | ||
lik.func = "pruning", logscale = TRUE) | ||
ace1 <- ancr(fit1, tips=TRUE) | ||
plot(ace1, args.plotTree = list(direction = "upwards")) | ||
title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns') | ||
``` | ||
|
||
```{r} | ||
m2 <- original | ||
m2[] <- 0.5 | ||
m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1 | ||
m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5) | ||
fit2 <- fitMk(tree = tree, x = m2, | ||
model = "ARD", pi = "fitzjohn", | ||
lik.func = "pruning", logscale = TRUE) | ||
ace2 <- ancr(fit2, tips=TRUE) | ||
plot(ace2, args.plotTree = list(direction = "upwards")) | ||
title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns') | ||
``` | ||
|
||
|
||
```{r} | ||
m3 <- original | ||
m3[] <- 0.5 | ||
m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1 | ||
m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0 | ||
m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1 | ||
m3[which(grepl('^Galago', rownames(m3))), 1] <- 0 | ||
fit3 <- fitMk(tree = tree, x = m3, | ||
model = "ARD", pi = "fitzjohn", | ||
lik.func = "pruning", logscale = TRUE) | ||
ace3 <- ancr(fit3, tips=TRUE) | ||
plot(ace3, args.plotTree = list(direction = "upwards")) | ||
title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns') | ||
``` | ||
|
||
|
||
# Session information | ||
|
||
```{r} | ||
sessioninfo::session_info() | ||
``` | ||
|