Skip to content

Commit

Permalink
add large files and update scrip[t
Browse files Browse the repository at this point in the history
  • Loading branch information
sdgamboa committed Nov 14, 2023
1 parent bfe522e commit bf8c421
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 1 deletion.
66 changes: 65 additions & 1 deletion misc/half_propagation/createStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ library(purrr)
library(tidyr)
library(ggplot2)
library(ggpubr)
library(ggtree)
library(data.tree)

ltp <- ltp()
tip_data <- ltp$tip_data
Expand Down Expand Up @@ -69,7 +71,7 @@ p_data <- map(p_l, ~ {
# habitat -----------------------------------------------------------------

h <- read.table(
file = 'misc/half_propagation/bugphyzz_export_2023-11-08.tsv',
file = 'misc/half_propagation/habitat.tsv',
header = TRUE, sep = '\t'
) |>
filter(!is.na(Attribute_group) & !is.na(Attribute)) |>
Expand Down Expand Up @@ -136,3 +138,65 @@ ggsave(
filename = 'misc/half_propagation/percent_plot.png',
plot = p1, width = 10, height = 9, dpi = 150, bg = 'white'
)


tree <- ltp$tree
node_data <- ltp$node_data
tip_data <- ltp$tip_data
data('tree_list')
ncbi_tree <- as.Node(tree_list)
ncbi_nodes <- ncbi_tree$Get(
attribute = 'name',
filterFun = function(node) node$name != 'ArcBac',
simplify = TRUE
) |>
unname()

ranks_var <- c('strain', 'species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom')
ranks_ncbi <- count(data.frame(r = sub('__\\d+$', '', ncbi_nodes)), r, name = 'NCBI') |>
mutate(
Rank = case_when(
r == 'c' ~ 'class',
r == 'f' ~ 'family',
r == 'g' ~ 'genus',
r == 'k' ~ 'kingdom',
r == 'o' ~ 'order',
r == 'p' ~ 'phylum',
r == 's' ~ 'species',
r == 't' ~ 'strain'
)
) |>
mutate(Rank = factor(Rank, levels = ranks_var)) |>
arrange(Rank) |>
select(-r)


ranks <- bind_rows(select(node_data, Rank), select(tip_data, Rank))
rownames(ranks) <- NULL
ranks_ltp <- ranks |>
mutate(Rank = ifelse(Rank == 'superkingdom', 'kingdom', Rank)) |>
filter(Rank %in% ranks_var) |>
mutate(Rank = factor(Rank, levels = ranks_var)) |>
count(Rank, name = 'LTP') |>
arrange(Rank)

rank_data <- full_join(ranks_ltp, ranks_ncbi, by = 'Rank')
rank_data_long <- rank_data |>
pivot_longer(
names_to = 'tree', values_to = 'n', cols = c(LTP, NCBI)
)

rank_data_long |>
ggplot(aes(Rank, n)) +
geom_col(aes(fill = tree), position = 'dodge') +
facet_wrap(.~Rank, scales = 'free', ncol = 2) +
# scale_y_continuous(breaks = function(x) seq(floor(min(x)), ceiling(max(x)), by = 1)) +
labs(
y = 'Number of taxids'
) +
theme_bw() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank()
)
3 changes: 3 additions & 0 deletions misc/half_propagation/habitat.tsv
Git LFS file not shown
3 changes: 3 additions & 0 deletions misc/half_propagation/no_habitat.tsv
Git LFS file not shown
Binary file modified misc/half_propagation/percent_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit bf8c421

Please sign in to comment.