Skip to content

Commit

Permalink
update vignette names
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Dec 4, 2023
1 parent d498e81 commit 567f95a
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 268 deletions.
181 changes: 53 additions & 128 deletions vignettes/articles/uncertainty_binary.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
title: "Uncertainty in tips for binary attributes when there are only 'TRUE' annotations"
title: "Uncertainty in tips for binary attributes with 'TRUE' and 'FALSE' annotations"
output:
html_document:
toc: true
Expand Down Expand Up @@ -33,8 +33,8 @@ data('primate.data')
tree <- primate.tree
data <- primate.data
data <- data[tree$tip.label,]
rownames(data) <- paste0('taxon', 1:nrow(data))
tree$tip.label <- paste0('taxon', 1:Ntip(tree))
# rownames(data) <- paste0('taxon', 1:nrow(data))
# tree$tip.label <- paste0('taxon', 1:Ntip(tree))
original <- data |>
tibble::rownames_to_column(var = 'Taxa') |>
select(Taxa, Activity_pattern) |>
Expand All @@ -53,16 +53,32 @@ original <- data |>
as.matrix()
colnames(original) <- c('A--TRUE', 'A--FALSE')
myFun <- function(mat, per_uncertain = 0.1) {
n_row <- nrow(mat)
n <- round(n_row * per_uncertain)
rows <- sample(x = 1:nrow(mat), size = n, replace = FALSE)
mat[rows,] <- rep(1/ncol(mat), ncol(mat))
mat
}
# myFun <- function(mat, uncertainty = 0.7, input_value = 0) {
# m <- mat
# if (input_value == 0.5) {
# m[] <- input_value
# } else if (input_value == 0) {
# m[,1] <- 0
# m[,2] <- 1
# }
# n <- round(nrow(mat) * (1 - uncertainty))
# a <- round(n / 3)
# b <- n - a
# nm1 <- sample(rownames(mat), b)
# nm2 <- sample(rownames(mat)[!rownames(mat) %in% nm1], a)
# m[nm1, 1] <- 1
# m[nm1, 2] <- 0
# m[nm2, 1] <- 0
# m[nm2, 2] <- 1
# return(m)
# }
head(sort(table(sub('^(\\w+)_.*$', '\\1', rownames(original))), decreasing = TRUE))
```

# No uncertainty

# Original

```{r}
fit <- fitMk(tree = tree, x = original,
model = "ARD", pi = "fitzjohn",
Expand All @@ -72,144 +88,53 @@ plot(ace, args.plotTree = list(direction="upwards"))
title(main = '0% uncertain tips', line = -1)
```

# Assigning 0.5 for TRUE and FALSE for annotations

This doesn't really work when all annotations are positive

## 99% uncertainty

In this case, a single annotation overrides all other annotations.
99% of tips are uncertain (0.5 TRUE and 0.5 FALSE).

```{r}
set.seed(1234)
m99 <- myFun(original, 0.99)
m99 <- m99[tree$tip.label,]
fit99 <- fitMk(tree = tree, x = m99,
m1 <- original
m1[which(grepl('^Macaca_', rownames(m1))), 1] <- 1
m1[which(!grepl('^Macaca_', rownames(m1))), 1] <- 0
m1[,2] <- ifelse(m1[,1] == 1, 0, 1)
fit1 <- fitMk(tree = tree, x = m1,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace99 <- ancr(fit99, tips=TRUE)
plot(ace99, args.plotTree = list(direction = "upwards"))
title(main = '99% uncertain tips', line = -1)
ace1 <- ancr(fit1, tips=TRUE)
plot(ace1, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE', line = -1, sub = '0/1 T/F for unknowns')
```

## 95% uncertainty

```{r}
set.seed(1234) ## with this seed all values are TRUE that is what I left it here
m95 <- myFun(original, 0.95)
m95 <- m95[tree$tip.label,]
fit95 <- fitMk(tree = tree, x = m95,
m2 <- original
m2[] <- 0.5
m2[which(grepl('^Macaca_', rownames(m2))), 1] <- 1
m2[,2] <- ifelse(m2[,1] == 1, 0, 0.5)
fit2 <- fitMk(tree = tree, x = m2,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace95 <- ancr(fit95, tips=TRUE)
plot(ace95, args.plotTree = list(direction="upwards"))
title(main = '95% uncertain tips', line = -1)
ace2 <- ancr(fit2, tips=TRUE)
plot(ace2, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE', line = -1, sub = '0.5 T/F for unknowns')
```


## 25% uncertainty

```{r}
# set.seed(1234)
# m75 <- myFun(original, 0.75)
# m75 <- m75[tree$tip.label,]
pos1 <- sample(1:nrow(original), round(nrow(original) * 0.75), replace = FALSE)
pos0.5 <- (1:nrow(original))[!1:nrow(original) %in% pos1]
m25 <- original
m25[] <- 0.5
m25[pos1, 1] <- 1
m25[pos1, 2] <- 0
fit25 <- fitMk(tree = tree, x = m25,
m3 <- original
m3[] <- 0.5
m3[which(grepl('^Macaca_', rownames(m3))), 1] <- 1
m3[which(grepl('^Macaca_', rownames(m3))), 2] <- 0
m3[which(grepl('^Galago_', rownames(m3))), 2] <- 1
m3[which(grepl('^Galago', rownames(m3))), 1] <- 0
fit3 <- fitMk(tree = tree, x = m3,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
m25 <- ancr(fit25, tips=TRUE)
plot(m25, args.plotTree = list(direction = "upwards"))
title(main = '25% uncertain tips', line = -1)
```



# Imputed zeros

```{r}
myFun2 <- function(vct, per_ones = 0.1) {
ones <- which(vct == 1)
zeros <- which(vct == 0)
perN <- round(per_ones * length(vct))
if (perN == length(ones)) {
new_vct <- vct
} else if (perN < length(ones)) {
keep_ones <- sample(ones, perN, replace = FALSE)
new_vct <- rep(0, length(vct))
new_vct[keep_ones] <- 1
} else if (perN > length(ones)) {
need_more <- perN - length(ones)
new_ones <- sample(zeros, need_more, replace = FALSE)
new_vct <- vct
new_vct[new_ones] <- 1
}
return(new_vct)
}
ace3 <- ancr(fit3, tips=TRUE)
plot(ace3, args.plotTree = list(direction = "upwards"))
title(main = 'Macaca TRUE - Galago FALSE', line = -1, sub = '0.5 T/F for unknowns')
```

## 95% imputed zeros

```{r}
x1 <- original
set.seed(12343)
x1[,1] <- myFun2(x1[,1], 0.05)
x1[,2] <- ifelse(x1[,1] == 1, 0, 1)
x1 <- x1[tree$tip.label,]
fit_x1 <- fitMk(tree = tree, x = x1,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace_x1 <- ancr(fit_x1, tips=TRUE)
plot(ace_x1, args.plotTree = list(direction="upwards"))
title(main = '95% imputed zeros A--FALSE', line = -1)
```

## 75% imputed zeros

```{r}
x2 <- original
set.seed(12343)
x2[,1] <- myFun2(x2[,1], 0.25)
x2[,2] <- ifelse(x2[,1] == 1, 0, 1)
x2 <- x2[tree$tip.label,]
fit_x2 <- fitMk(tree = tree, x = x2,
model = "ARD", pi = "fitzjohn",
lik.func = "pruning", logscale = TRUE)
ace_x2 <- ancr(fit_x2, tips=TRUE)
plot(ace_x2, args.plotTree = list(direction="upwards"))
title(main = '75% imputed zeros A--FALSE', line = -1)
```

## As currently implemented

Maybe just a few will have annotations above the threholds.
I don't think we should import FALSE values since they were all inputed.

```{r}
hist(
ace_x2$ace[Ntip(tree) + 1:Nnode(tree), 1],
main = 'Scores of internal nodes TRUE', xlab = 'Score'
)
```

# Session information

```{r}
sessioninfo::session_info()
```





140 changes: 0 additions & 140 deletions vignettes/articles/uncertainty_binary_2.Rmd

This file was deleted.

0 comments on commit 567f95a

Please sign in to comment.