Skip to content

Commit

Permalink
update binary vignette
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Dec 4, 2023
1 parent a3cdb2e commit d498e81
Showing 1 changed file with 140 additions and 0 deletions.
140 changes: 140 additions & 0 deletions vignettes/articles/uncertainty_binary_2.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
---
title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations"
output:
html_document:
toc: true
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

```{r setup}
library(ape)
library(phytools)
library(dplyr)
library(tidyr)
```

A few attributes in bugphyzz only have 'TRUE' annotations.
Other attributes have both 'TRUE' and 'FALSE' annotations.

Tips that are uncertain could be treated as FALSE annotations (imputed data) or
uncertain annotations with prior probabilities set to 0.5 for TRUE and 0.5 for
FALSE. In any case, ASR is not very reliable with any of these approaches,
especially when the percentage of tips with annotations is low.

```{r}
data('primate.tree')
data('primate.data')
tree <- primate.tree
data <- primate.data
data <- data[tree$tip.label,]
# rownames(data) <- paste0('taxon', 1:nrow(data))
# tree$tip.label <- paste0('taxon', 1:Ntip(tree))
original <- data |>
tibble::rownames_to_column(var = 'Taxa') |>
select(Taxa, Activity_pattern) |>
mutate(Presence = 1) |>
pivot_wider(
names_from = 'Activity_pattern', values_from = 'Presence',
values_fill = 0
) |>
arrange(Taxa) |>
tibble::column_to_rownames(var = 'Taxa') |>
select(Diurnal) |>
mutate(
not_diurnal = ifelse(Diurnal == 0, 1, 0)
) |>
as.matrix()
colnames(original) <- c('A--TRUE', 'A--FALSE')
# myFun <- function(mat, uncertainty = 0.7, input_value = 0) {
# m <- mat
# if (input_value == 0.5) {
# m[] <- input_value
# } else if (input_value == 0) {
# m[,1] <- 0
# m[,2] <- 1
# }
# n <- round(nrow(mat) * (1 - uncertainty))
# a <- round(n / 3)
# b <- n - a
# nm1 <- sample(rownames(mat), b)
# nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a)
# m[nm1, 1] <- 1
# m[nm1, 2] <- 0
# m[nm2, 1] <- 0
# m[nm2, 2] <- 1
# return(m)
# }
head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE))
```


# Original

```{r}
fit <- fitMk(tree = tree, x = original,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace <- ancr(fit, tips=TRUE)
plot(ace, args.plotTree = list(direction="upwards"))
title(main = '0% uncertain tips', line = -1)
```


```{r}
m1 <- original
m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1
m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0
m1[,2] <- ifelse(m1[,1] == 1, 0, 1)
fit1 <- fitMk(tree = tree, x = m1,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace1 <- ancr(fit1, tips=TRUE)
plot(ace1, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns')
```

```{r}
m2 <- original
m2[] <- 0.5
m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1
m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5)
fit2 <- fitMk(tree = tree, x = m2,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace2 <- ancr(fit2, tips=TRUE)
plot(ace2, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns')
```


```{r}
m3 <- original
m3[] <- 0.5
m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1
m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0
m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1
m3[which(grepl('^Galago', rownames(m3))), 1] <- 0
fit3 <- fitMk(tree = tree, x = m3,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace3 <- ancr(fit3, tips=TRUE)
plot(ace3, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns')
```


# Session information

```{r}
sessioninfo::session_info()
```

0 comments on commit d498e81

Please sign in to comment.