Skip to content

Commit

Permalink
update phytools2 script
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Oct 10, 2023
1 parent 214acd7 commit f11e25c
Showing 1 changed file with 102 additions and 107 deletions.
209 changes: 102 additions & 107 deletions inst/scripts/phytools2.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ for (i in seq_along(phys_data_ready)) {
)
asr <- ancr(object = fit, tips = TRUE)
})
print(tim)

res <- asr$ace
node_rows <- length(pruned_tree$tip.label) + 1:pruned_tree$Nnode
Expand Down Expand Up @@ -263,24 +264,26 @@ for (i in seq_along(phys_data_ready)) {

## Perform taxonomic pooling and inheritance (round 2)
message('Performing second taxPool and Inh pooling for', current_phys)
ncbi_tree$Do(function(node) {
cond1 <- node$name %in% names(new_taxa_for_ncbi_tree_list)
cond2 <- is.null(node$attribute_tbl) || all(is.na(node$attribute_tbl))
if (cond1 && cond2) {
node$attribute_tbl <- new_taxa_for_ncbi_tree_list[[node$name]]
}
tim <- system.time({
ncbi_tree$Do(function(node) {
cond1 <- node$name %in% names(new_taxa_for_ncbi_tree_list)
cond2 <- is.null(node$attribute_tbl) || all(is.na(node$attribute_tbl))
if (cond1 && cond2) {
node$attribute_tbl <- new_taxa_for_ncbi_tree_list[[node$name]]
}
})
ncbi_tree$Do(
function(node) {
taxPool(
node = node,
grp = Attribute_group_var,
typ = Attribute_type_var
)
},
traversal = 'post-order'
)
ncbi_tree$Do(inh2, traversal = 'pre-order')
})
ncbi_tree$Do(
function(node) {
taxPool(
node = node,
grp = Attribute_group_var,
typ = Attribute_type_var
)
},
traversal = 'post-order'
)
ncbi_tree$Do(inh2, traversal = 'pre-order')

## Extracting files
result <- ncbi_tree$Get(
Expand Down Expand Up @@ -320,52 +323,50 @@ print(elapsed_time)



ncbi_tree$Do(function(node) {
if (node$name %in% names(phys_data_list)) {
node$attribute_tbl <- phys_data_list[[node$name]]
} else {
node$attribute_tbl <- NULL
}
})

# ncbi_tree$Do(function(node) {
# if (node$name %in% names(phys_data_list)) {
# node$attribute_tbl <- phys_data_list[[node$name]]
# } else {
# node$attribute_tbl <- NULL
# }
# })

## ------------------------------------------------------------------------------------------------------------------------
Attribute_group_var <- unique(phys_data_ready$Attribute_group)
Attribute_group_var <- Attribute_group_var[!is.na(Attribute_group_var)]
Attribute_type_var <- unique(phys_data_ready$Attribute_type)
Attribute_type_var <- Attribute_type_var[!is.na(Attribute_type_var)]
ncbi_tree$Do(
function(node) taxPool(node = node, grp = Attribute_group_var, typ = Attribute_type_var),
traversal = 'post-order'
)
ncbi_tree$Do(inh1, traversal = 'pre-order')


## ------------------------------------------------------------------------------------------------------------------------
new_phys_data <- ncbi_tree$Get(
'attribute_tbl', filterFun = function(node) grepl('^[gst]__', node$name)
) |>
discard(~ all(is.na(.x))) |>
bind_rows() |>
arrange(NCBI_ID, Attribute) |>
filter(!NCBI_ID %in% phys_data_ready$NCBI_ID) |>
mutate(taxid = sub('^\\w__', '', NCBI_ID)) |>
bind_rows(phys_data_ready)

tip_data_annotated <- left_join(
tip_data,
select(new_phys_data, taxid, Attribute, Score),
by = 'taxid'
)

annotated_tips <- tip_data_annotated |>
select(tip_label, Attribute, Score) |>
filter(!is.na(Attribute)) |>
pivot_wider(
names_from = 'Attribute', values_from = 'Score', values_fill = 0
) |>
tibble::column_to_rownames(var = 'tip_label') |>
as.matrix()
# Attribute_group_var <- unique(phys_data_ready$Attribute_group)
# Attribute_group_var <- Attribute_group_var[!is.na(Attribute_group_var)]
# Attribute_type_var <- unique(phys_data_ready$Attribute_type)
# Attribute_type_var <- Attribute_type_var[!is.na(Attribute_type_var)]
# ncbi_tree$Do(
# function(node) taxPool(node = node, grp = Attribute_group_var, typ = Attribute_type_var),
# traversal = 'post-order'
# )
# ncbi_tree$Do(inh1, traversal = 'pre-order')


# new_phys_data <- ncbi_tree$Get(
# 'attribute_tbl', filterFun = function(node) grepl('^[gst]__', node$name)
# ) |>
# discard(~ all(is.na(.x))) |>
# bind_rows() |>
# arrange(NCBI_ID, Attribute) |>
# filter(!NCBI_ID %in% phys_data_ready$NCBI_ID) |>
# mutate(taxid = sub('^\\w__', '', NCBI_ID)) |>
# bind_rows(phys_data_ready)
#
# tip_data_annotated <- left_join(
# tip_data,
# select(new_phys_data, taxid, Attribute, Score),
# by = 'taxid'
# )
#
# annotated_tips <- tip_data_annotated |>
# select(tip_label, Attribute, Score) |>
# filter(!is.na(Attribute)) |>
# pivot_wider(
# names_from = 'Attribute', values_from = 'Score', values_fill = 0
# ) |>
# tibble::column_to_rownames(var = 'tip_label') |>
# as.matrix()
# no_annotated_tips_chr <- tip_data_annotated |>
# filter(!tip_label %in% rownames(annotated_tips)) |>
# pull(tip_label) |>
Expand All @@ -383,25 +384,22 @@ annotated_tips <- tip_data_annotated |>
# input_matrix <- input_matrix[tree$tip.label,]


## add scode for ASR

tree <- ape::keep.tip(tree, tip = rownames(annotated_tips))
tree <- reorder(tree, 'postorder')
input_matrix <- annotated_tips
# tree <- ape::keep.tip(tree, tip = rownames(annotated_tips))
# tree <- reorder(tree, 'postorder')
# input_matrix <- annotated_tips

## ------------------------------------------------------------------------------------------------------------------------
system.time({
fit <- fitMk(
tree = tree, x = input_matrix, model = 'ER', pi = 'fitzjohn',
lik.func = 'pruning', logscale = TRUE
)
})
asr <- ancr(object = fit, tips = TRUE)
res <- asr$ace
node_rows <- length(tree$tip.label) + 1:tree$Nnode
rownames(res)[node_rows] <- tree$node.label
# system.time({
# fit <- fitMk(
# tree = tree, x = input_matrix, model = 'ER', pi = 'fitzjohn',
# lik.func = 'pruning', logscale = TRUE
# )
# })
# asr <- ancr(object = fit, tips = TRUE)
# res <- asr$ace
# node_rows <- length(tree$tip.label) + 1:tree$Nnode
# rownames(res)[node_rows] <- tree$node.label

## ------------------------------------------------------------------------------------------------------------------------
# new_taxa_from_tips <- res[rownames(no_annotated_tips),] |>
# as.data.frame() |>
# tibble::rownames_to_column(var = 'tip_label') |>
Expand Down Expand Up @@ -442,37 +440,34 @@ rownames(res)[node_rows] <- tree$node.label



## ------------------------------------------------------------------------------------------------------------------------
ncbi_tree$Do(function(node) {
cond1 <- node$name %in% names(new_taxa_for_ncbi_tree_list)
cond2 <- is.null(node$attribute_tbl) || all(is.na(node$attribute_tbl))
if (cond1 && cond2) {
node$attribute_tbl <- new_taxa_for_ncbi_tree_list[[node$name]]
}
})
# ncbi_tree$Do(function(node) {
# cond1 <- node$name %in% names(new_taxa_for_ncbi_tree_list)
# cond2 <- is.null(node$attribute_tbl) || all(is.na(node$attribute_tbl))
# if (cond1 && cond2) {
# node$attribute_tbl <- new_taxa_for_ncbi_tree_list[[node$name]]
# }
# })


## ------------------------------------------------------------------------------------------------------------------------
ncbi_tree$Do(
function(node) taxPool(node = node, grp = Attribute_group_var, typ = Attribute_type_var ),
traversal = 'post-order'
)
ncbi_tree$Do(inh2, traversal = 'pre-order')
# ncbi_tree$Do(
# function(node) taxPool(node = node, grp = Attribute_group_var, typ = Attribute_type_var ),
# traversal = 'post-order'
# )
# ncbi_tree$Do(inh2, traversal = 'pre-order')


## ------------------------------------------------------------------------------------------------------------------------
output <- ncbi_tree$Get(
attribute = 'attribute_tbl', simplify = FALSE,
filterFun = function(node) node$name != 'ArcBac' && !is.null(node$attribute_tbl)
) |>
bind_rows() |>
discard(~ all(is.na(.x)))
min_thr <- 1 / length(unique(phys_data_ready$Attribute))
add_taxa_1 <- phys_data_ready |>
filter(!NCBI_ID %in% unique(output$NCBI_ID)) |>
discard(~ all(is.na(.x)))
add_taxa_2 <- new_taxa_for_ncbi_tree |>
filter(!NCBI_ID %in% unique(output$NCBI_ID)) |>
discard(~ all(is.na(.x)))
final_output <- bind_rows(list(output, add_taxa_1, add_taxa_2)) |>
filter(Score > min_thr)
# output <- ncbi_tree$Get(
# attribute = 'attribute_tbl', simplify = FALSE,
# filterFun = function(node) node$name != 'ArcBac' && !is.null(node$attribute_tbl)
# ) |>
# bind_rows() |>
# discard(~ all(is.na(.x)))
# min_thr <- 1 / length(unique(phys_data_ready$Attribute))
# add_taxa_1 <- phys_data_ready |>
# filter(!NCBI_ID %in% unique(output$NCBI_ID)) |>
# discard(~ all(is.na(.x)))
# add_taxa_2 <- new_taxa_for_ncbi_tree |>
# filter(!NCBI_ID %in% unique(output$NCBI_ID)) |>
# discard(~ all(is.na(.x)))
# final_output <- bind_rows(list(output, add_taxa_1, add_taxa_2)) |>
# filter(Score > min_thr)

0 comments on commit f11e25c

Please sign in to comment.