Skip to content

Commit

Permalink
Add ltp3 to import the latest LTP tree, including genera added as add…
Browse files Browse the repository at this point in the history
…itional

tips with zero length. These are attached to the internal nodes containing genera.
  • Loading branch information
sdgamboa committed Nov 26, 2023
1 parent 1fc4fe6 commit 9cd459c
Show file tree
Hide file tree
Showing 7 changed files with 25,001 additions and 22,976 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export(getSetWithoutIDs)
export(inh1)
export(inh2)
export(ltp)
export(ltp3)
export(mpa)
export(scores2Freq)
export(taxPool)
31 changes: 7 additions & 24 deletions R/trees.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,12 @@ ltp2 <- function(remove_zero_tips = TRUE) {

#' Get living tree project (LTP) tree or data v3
#'
#' \code{ltp3} gets the LTP tree or data.
#' \code{ltp3} gets the LTP tree or data. Use `ape::drop.tip` to remove
#' tips.
#'
#' @return A list with a phylo (the ltp tree) and a data.frame (tip_data)
#' objects.

#' @export
#'
ltp3 <- function() {
Expand All @@ -224,8 +226,7 @@ ltp3 <- function() {
file = tip_data_fname, header = TRUE, sep = '\t', row.names = NULL
) |>
purrr::modify(as.character) |>
as.data.frame() |>
dplyr::select(-.data$taxname)
as.data.frame()
rownames(tip_data) <- tip_data$tip_label

node_data <- utils::read.table(
Expand All @@ -234,29 +235,11 @@ ltp3 <- function() {
purrr::modify(as.character) |>
as.data.frame()

# tip_data <- tip_data |>
# dplyr::group_by(taxid) |>
# dplyr::slice_head(n = 1) |>
# dplyr::ungroup() |>
# as.data.frame()
# ntips4 <- nrow(tip_data)
# tree <- ape::keep.tip(phy = tree, tip = tip_data$tip_label)
# message('Dropping ', ntips3 - ntips4, ' tips because of duplicated taxids.')
# tip_data <- tip_data[tree$tip.label,]
# message('Tips remaining: ', length(tree$tip.label))

# tip_data <- tip_data |>
# dplyr::mutate(
# NCBI_ID = dplyr::case_when(
# .data$Rank == 'genus' ~ paste0('g__', taxid),
# .data$Rank == 'species' ~ paste0('s__', taxid),
# .data$Rank == 'strain' ~ paste0('t__', taxid),
# TRUE ~ NA
# )
# )
genus_tips <- grep('g__', tip_data$tip_label, value = TRUE)

list(
tree = tree, tip_data = tip_data, node_data = node_data
tree = tree, tip_data = tip_data, node_data = node_data,
genus_tips = genus_tips
)
}

Expand Down
2 changes: 1 addition & 1 deletion inst/extdata/LTP_all_08_2023.newick

Large diffs are not rendered by default.

6,314 changes: 3,157 additions & 3,157 deletions inst/extdata/LTP_all_08_2023.node_data

Large diffs are not rendered by default.

41,571 changes: 21,779 additions & 19,792 deletions inst/extdata/LTP_all_08_2023.tip_data

Large diffs are not rendered by default.

42 changes: 40 additions & 2 deletions inst/scripts/get_living_tree_2023.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,13 @@ all(tree$tip.label %in% tip_data$tip_label)
# tip_data$taxid
tip_data$Taxon_name <- taxizedb::taxid2name(tip_data$taxid, db = 'ncbi')
tip_data$Rank <- taxizedb::taxid2rank(tip_data$taxid, db = 'ncbi')
tip_data$rank <- NULL


# node_data$taxid
node_data$Taxon_name <- taxizedb::taxid2name(node_data$taxid, db = 'ncbi')
node_data$Rank <- taxizedb::taxid2rank(node_data$taxid, db = 'ncbi')
node_data$rank <- NULL


# Add genus information ---------------------------------------------------
Expand All @@ -200,20 +202,56 @@ names(node_data_g) <- paste0('g__', names(node_data_g))
tree_extended <- tree
system.time({
for (i in seq_along(node_data_g)) {
message('Adding ', names(node_data_g)[i], '- ', i)
tree_extended <- bind.tip(
tree = tree_extended, edge.length = 0, where = node_data_g[i],
tip.label = names(node_data_g)[i]
)
}
})

extra_tip_data <- data.frame(tip_label = grep('g__', tree_extended$tip.label, value = TRUE))
extra_tip_data$accession <- NA
extra_tip_data$taxid <- sub('g__', '', extra_tip_data$tip_label)
extra_tip_data$NCBI_ID <- extra_tip_data$tip_label
extra_tip_data$Rank <- 'genus'
extra_tip_taxonomy <- taxizedb::classification(
x = unique(extra_tip_data$taxid), db = 'ncbi'
)
extra_tip_new_taxonomy <- purrr::map(extra_tip_taxonomy, ~ {
x <- .x
x
x <- x[which(x$rank %in% taxonomic_ranks),]
m <- matrix(x$id, byrow = TRUE, nrow = 1)
colnames(m) <- x$rank
d <- as.data.frame(m)
colnames(d) <- sub("$", "_taxid", colnames(d))
d
}) |>
bind_rows(.id = 'taxid') |>
relocate(
taxid, kingdom_taxid = superkingdom_taxid, phylum_taxid, class_taxid,
order_taxid, family_taxid, genus_taxid
) |>
discard(~ all(is.na(.x)))

extra_tip_data <- left_join(extra_tip_data, extra_tip_new_taxonomy, by = 'taxid')
extra_tip_data$Taxon_name <- taxizedb::taxid2name(extra_tip_data$taxid, db = 'ncbi')
rownames(extra_tip_data) <- extra_tip_data$tip_label
tip_data_extended <- bind_rows(tip_data, extra_tip_data)
tip_data_extended <- tip_data_extended[tree_extended$tip.label,]


# trimmed_tree <- drop.tip(phy = tree_extended, tip = extra_tip_data$tip_label)


# Export data -------------------------------------------------------------
tree_fname <- file.path('inst', 'extdata', 'LTP_all_08_2023.newick')
ape::write.tree(tree, tree_fname)
ape::write.tree(tree_extended, tree_fname)

tip_data_fname <- file.path('inst', 'extdata', 'LTP_all_08_2023.tip_data')
write.table(
tip_data, tip_data_fname, sep = '\t', quote = TRUE,
tip_data_extended, tip_data_fname, sep = '\t', quote = TRUE,
row.names = FALSE
)

Expand Down
16 changes: 16 additions & 0 deletions man/ltp3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9cd459c

Please sign in to comment.